Merge "Fix the benchmark fail by caliper's change"
diff --git a/Android.mk b/Android.mk
index a518d2f..25796a0 100644
--- a/Android.mk
+++ b/Android.mk
@@ -558,3 +558,10 @@
 TEST_ART_TARGET_SYNC_DEPS :=
 
 include $(art_path)/runtime/openjdkjvm/Android.mk
+
+# Helper target that depends on boot image creation.
+#
+# Can be used, for example, to dump initialization failures:
+#   m art-boot-image ART_BOOT_IMAGE_EXTRA_ARGS=--dump-init-failures=fails.txt
+.PHONY: art-boot-image
+art-boot-image: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME)
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 6952d69..6befec5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -63,7 +63,11 @@
     ART_TARGET_ARCH_64 := $(TARGET_ARCH)
   else
     # TODO: ???
-    $(error Do not know what to do with this multi-target configuration!)
+    $(warning Do not know what to do with this multi-target configuration!)
+    ART_PHONY_TEST_TARGET_SUFFIX := 32
+    2ND_ART_PHONY_TEST_TARGET_SUFFIX :=
+    ART_TARGET_ARCH_32 := $(TARGET_ARCH)
+    ART_TARGET_ARCH_64 :=
   endif
 else
   ifneq ($(filter %64,$(TARGET_ARCH)),)
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 2294ddb..0235a30 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -363,7 +363,21 @@
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
-ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
+
+ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET \
+                     -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS) \
+
+ifeq ($(ART_TARGET_LINUX),true)
+# Setting ART_TARGET_LINUX to true compiles art/ assuming that the target device
+# will be running linux rather than android.
+ART_TARGET_CFLAGS += -DART_TARGET_LINUX
+else
+# The ART_TARGET_ANDROID macro is passed to target builds, which check
+# against it instead of against __ANDROID__ (which is provided by target
+# toolchains).
+ART_TARGET_CFLAGS += -DART_TARGET_ANDROID
+endif
+
 ART_TARGET_CFLAGS += $(art_target_cflags)
 ART_TARGET_ASFLAGS += $(art_asflags)
 
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index ecc9e76..7be1894 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -93,11 +93,10 @@
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
-ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
 # Classpath for Jack compilation: we only need core-libart.
 HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
 HOST_JACK_CLASSPATH                := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj-hostdex,t,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack)
 TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
 TARGET_JACK_CLASSPATH              := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-oj, ,COMMON)/classes.jack):$(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
-endif
+
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index cde41e0..df7df26 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -201,7 +201,6 @@
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
-      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
@@ -217,7 +216,6 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
-      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index f67da3f..3b459c3 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -172,6 +172,7 @@
   runtime/arch/x86/instruction_set_features_x86_test.cc \
   runtime/arch/x86_64/instruction_set_features_x86_64_test.cc \
   runtime/barrier_test.cc \
+  runtime/base/arena_allocator_test.cc \
   runtime/base/bit_field_test.cc \
   runtime/base/bit_utils_test.cc \
   runtime/base/bit_vector_test.cc \
@@ -195,7 +196,6 @@
   runtime/entrypoints/math_entrypoints_test.cc \
   runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \
   runtime/entrypoints_order_test.cc \
-  runtime/exception_test.cc \
   runtime/gc/accounting/card_table_test.cc \
   runtime/gc/accounting/mod_union_table_test.cc \
   runtime/gc/accounting/space_bitmap_test.cc \
@@ -251,6 +251,7 @@
   compiler/driver/compiled_method_storage_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
+  compiler/exception_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
   compiler/linker/multi_oat_relative_patcher_test.cc \
@@ -269,12 +270,13 @@
   compiler/optimizing/nodes_test.cc \
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
+  compiler/optimizing/reference_type_propagation_test.cc \
   compiler/optimizing/side_effects_test.cc \
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
+  compiler/utils/intrusive_forward_list_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
 
@@ -648,11 +650,11 @@
 
 ifeq ($(ART_BUILD_TARGET),true)
   $(foreach file,$(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),,libbacktrace)))
-  $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),art/compiler,libartd-compiler libbacktrace)))
+  $(foreach file,$(COMPILER_GTEST_TARGET_SRC_FILES), $(eval $(call define-art-gtest,target,$(file),art/compiler,libartd-compiler libbacktrace libnativeloader)))
 endif
 ifeq ($(ART_BUILD_HOST),true)
   $(foreach file,$(RUNTIME_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),,libbacktrace)))
-  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),art/compiler,libartd-compiler libbacktrace)))
+  $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call define-art-gtest,host,$(file),art/compiler,libartd-compiler libbacktrace libnativeloader)))
 endif
 
 # Used outside the art project to get a list of the current tests
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 81b854e..7c53e01 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -461,8 +461,8 @@
   * Test successes
   */
   {
-    EXPECT_SINGLE_PARSE_VALUE(true, "-Xusejit:true", M::UseJIT);
-    EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT);
+    EXPECT_SINGLE_PARSE_VALUE(true, "-Xusejit:true", M::UseJitCompilation);
+    EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJitCompilation);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index c0a00cc..4797540 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -620,6 +620,8 @@
         log_verbosity.verifier = true;
       } else if (verbose_options[j] == "image") {
         log_verbosity.image = true;
+      } else if (verbose_options[j] == "systrace-locks") {
+        log_verbosity.systrace_lock_logging = true;
       } else {
         return Result::Usage(std::string("Unknown -verbose option ") + verbose_options[j]);
       }
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f12f007..e9c22d2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -40,6 +40,7 @@
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
+	optimizing/block_builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
@@ -53,6 +54,7 @@
 	optimizing/induction_var_analysis.cc \
 	optimizing/induction_var_range.cc \
 	optimizing/inliner.cc \
+	optimizing/instruction_builder.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
 	optimizing/licm.cc \
@@ -148,6 +150,7 @@
 LIBART_COMPILER_CFLAGS :=
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
+  compiled_method.h \
   dex/compiler_enums.h \
   dex/dex_to_dex_compiler.h \
   driver/compiler_driver.h \
@@ -299,28 +302,6 @@
     endif
   endif
 
-  ifeq ($$(art_target_or_host),target)
-    ifeq ($$(art_ndebug_or_debug),debug)
-      $(TARGET_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
-    else
-      $(TARGET_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
-    endif
-  else # host
-    ifeq ($$(art_ndebug_or_debug),debug)
-      ifeq ($$(art_static_or_shared),static)
-        $(HOST_OUT_EXECUTABLES)/dex2oatds: $$(LOCAL_INSTALLED_MODULE)
-      else
-        $(HOST_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
-      endif
-    else
-      ifeq ($$(art_static_or_shared),static)
-        $(HOST_OUT_EXECUTABLES)/dex2oats: $$(LOCAL_INSTALLED_MODULE)
-      else
-        $(HOST_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
-      endif
-    endif
-  endif
-
   # Clear locally defined variables.
   art_target_or_host :=
   art_ndebug_or_debug :=
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 230cb9a..f8b7460 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -55,7 +55,9 @@
                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
-    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    const uint8_t* asm_base = actual_asm.data();
+    const uint8_t* asm_end = asm_base + actual_asm.size();
+    auto* opts = new DisassemblerOptions(false, asm_base, asm_end, true);
     std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
     std::stringstream stream;
     const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 6483ef6..bf29e1c 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -59,36 +59,20 @@
     ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
     uint32_t vmap_table_offset = vmap_table.empty() ? 0u
         : sizeof(OatQuickMethodHeader) + vmap_table.size();
-    ArrayRef<const uint8_t> mapping_table = compiled_method->GetMappingTable();
-    bool mapping_table_used = !mapping_table.empty();
-    size_t mapping_table_size = mapping_table.size();
-    uint32_t mapping_table_offset = !mapping_table_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size;
-    ArrayRef<const uint8_t> gc_map = compiled_method->GetGcMap();
-    bool gc_map_used = !gc_map.empty();
-    size_t gc_map_size = gc_map.size();
-    uint32_t gc_map_offset = !gc_map_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size + gc_map_size;
-    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
+    OatQuickMethodHeader method_header(vmap_table_offset,
                                        compiled_method->GetFrameSizeInBytes(),
                                        compiled_method->GetCoreSpillMask(),
-                                       compiled_method->GetFpSpillMask(), code_size);
+                                       compiled_method->GetFpSpillMask(),
+                                       code_size);
 
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
     const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
-    const size_t size =
-        gc_map_size + mapping_table_size + vmap_table.size() + sizeof(method_header) + code_size;
+    const size_t size = vmap_table.size() + sizeof(method_header) + code_size;
     chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
     chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
-    if (mapping_table_used) {
-      chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end());
-    }
-    if (gc_map_used) {
-      chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end());
-    }
     chunk->insert(chunk->end(), code.begin(), code.end());
     CHECK_EQ(chunk->size(), size);
     const void* unaligned_code_ptr = chunk->data() + (size - code_size);
@@ -196,6 +180,7 @@
                                             isa,
                                             instruction_set_features_.get(),
                                             /* boot_image */ true,
+                                            /* app_image */ false,
                                             GetImageClasses(),
                                             GetCompiledClasses(),
                                             GetCompiledMethods(),
@@ -301,7 +286,7 @@
   MemMap::Init();
   image_reservation_.reset(MemMap::MapAnonymous("image reservation",
                                                 reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
-                                                (size_t)100 * 1024 * 1024,  // 100MB
+                                                (size_t)120 * 1024 * 1024,  // 120MB
                                                 PROT_NONE,
                                                 false /* no need for 4gb flag with fixed mmap*/,
                                                 false /* not reusing existing reservation */,
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7c2c844..2d139eb 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -115,13 +115,6 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When heap reference poisoning works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK() \
-  if (kPoisonHeapReferences && GetCompilerKind() == Compiler::kQuick) { \
-    printf("WARNING: TEST DISABLED FOR HEAP REFERENCE POISONING WITH QUICK\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all tests, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER() \
   if (kUseReadBarrier) { \
@@ -129,13 +122,6 @@
     return; \
   }
 
-// TODO: When read barrier works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK() \
-  if (kUseReadBarrier && GetCompilerKind() == Compiler::kQuick) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER WITH QUICK\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all Optimizing back ends, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
@@ -155,13 +141,6 @@
     }                                                                                     \
   }
 
-// TODO: When non-PIC works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \
-  if (GetCompilerKind() == Compiler::kOptimizing) { \
-    printf("WARNING: TEST DISABLED FOR NON-PIC COMPILING WITH OPTIMIZING\n"); \
-    return; \
-  }
-
 }  // namespace art
 
 #endif  // ART_COMPILER_COMMON_COMPILER_TEST_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 9551d22..f06d90c 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -106,9 +106,7 @@
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
                                const ArrayRef<const SrcMapElem>& src_mapping_table,
-                               const ArrayRef<const uint8_t>& mapping_table,
                                const ArrayRef<const uint8_t>& vmap_table,
-                               const ArrayRef<const uint8_t>& native_gc_map,
                                const ArrayRef<const uint8_t>& cfi_info,
                                const ArrayRef<const LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code),
@@ -116,9 +114,7 @@
       fp_spill_mask_(fp_spill_mask),
       src_mapping_table_(
           driver->GetCompiledMethodStorage()->DeduplicateSrcMappingTable(src_mapping_table)),
-      mapping_table_(driver->GetCompiledMethodStorage()->DeduplicateMappingTable(mapping_table)),
       vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)),
-      gc_map_(driver->GetCompiledMethodStorage()->DeduplicateGCMap(native_gc_map)),
       cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)),
       patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) {
 }
@@ -131,15 +127,20 @@
     const uint32_t core_spill_mask,
     const uint32_t fp_spill_mask,
     const ArrayRef<const SrcMapElem>& src_mapping_table,
-    const ArrayRef<const uint8_t>& mapping_table,
     const ArrayRef<const uint8_t>& vmap_table,
-    const ArrayRef<const uint8_t>& native_gc_map,
     const ArrayRef<const uint8_t>& cfi_info,
     const ArrayRef<const LinkerPatch>& patches) {
   SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
   CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, src_mapping_table, mapping_table, vmap_table, native_gc_map,
+  alloc.construct(ret,
+                  driver,
+                  instruction_set,
+                  quick_code,
+                  frame_size_in_bytes,
+                  core_spill_mask,
+                  fp_spill_mask,
+                  src_mapping_table,
+                  vmap_table,
                   cfi_info, patches);
   return ret;
 }
@@ -154,9 +155,7 @@
   CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
   storage->ReleaseLinkerPatches(patches_);
   storage->ReleaseCFIInfo(cfi_info_);
-  storage->ReleaseGCMap(gc_map_);
   storage->ReleaseVMapTable(vmap_table_);
-  storage->ReleaseMappingTable(mapping_table_);
   storage->ReleaseSrcMappingTable(src_mapping_table_);
 }
 
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 5887620..9479ff3 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_COMPILED_METHOD_H_
 
 #include <memory>
+#include <iosfwd>
 #include <string>
 #include <vector>
 
@@ -158,21 +159,34 @@
 
 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
 
-
-enum LinkerPatchType {
-  kLinkerPatchMethod,
-  kLinkerPatchCall,
-  kLinkerPatchCallRelative,  // NOTE: Actual patching is instruction_set-dependent.
-  kLinkerPatchType,
-  kLinkerPatchDexCacheArray,  // NOTE: Actual patching is instruction_set-dependent.
-};
-
 class LinkerPatch {
  public:
+  // Note: We explicitly specify the underlying type of the enum because GCC
+  // would otherwise select a bigger underlying type and then complain that
+  //     'art::LinkerPatch::patch_type_' is too small to hold all
+  //     values of 'enum class art::LinkerPatch::Type'
+  // which is ridiculous given we have only a handful of values here. If we
+  // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
+  // patch_type_ as an uintN_t and do explicit static_cast<>s.
+  enum class Type : uint8_t {
+    kRecordPosition,   // Just record patch position for patchoat.
+    kMethod,
+    kCall,
+    kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
+    kType,
+    kString,
+    kStringRelative,   // NOTE: Actual patching is instruction_set-dependent.
+    kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
+  };
+
+  static LinkerPatch RecordPosition(size_t literal_offset) {
+    return LinkerPatch(literal_offset, Type::kRecordPosition, /* target_dex_file */ nullptr);
+  }
+
   static LinkerPatch MethodPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -180,7 +194,7 @@
   static LinkerPatch CodePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kCall, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -188,7 +202,7 @@
   static LinkerPatch RelativeCodePatch(size_t literal_offset,
                                        const DexFile* target_dex_file,
                                        uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kCallRelative, target_dex_file);
     patch.method_idx_ = target_method_idx;
     return patch;
   }
@@ -196,17 +210,35 @@
   static LinkerPatch TypePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_type_idx) {
-    LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kType, target_dex_file);
     patch.type_idx_ = target_type_idx;
     return patch;
   }
 
+  static LinkerPatch StringPatch(size_t literal_offset,
+                                 const DexFile* target_dex_file,
+                                 uint32_t target_string_idx) {
+    LinkerPatch patch(literal_offset, Type::kString, target_dex_file);
+    patch.string_idx_ = target_string_idx;
+    return patch;
+  }
+
+  static LinkerPatch RelativeStringPatch(size_t literal_offset,
+                                         const DexFile* target_dex_file,
+                                         uint32_t pc_insn_offset,
+                                         uint32_t target_string_idx) {
+    LinkerPatch patch(literal_offset, Type::kStringRelative, target_dex_file);
+    patch.string_idx_ = target_string_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch DexCacheArrayPatch(size_t literal_offset,
                                         const DexFile* target_dex_file,
                                         uint32_t pc_insn_offset,
                                         size_t element_offset) {
     DCHECK(IsUint<32>(element_offset));
-    LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file);
+    LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file);
     patch.pc_insn_offset_ = pc_insn_offset;
     patch.element_offset_ = element_offset;
     return patch;
@@ -219,47 +251,65 @@
     return literal_offset_;
   }
 
-  LinkerPatchType Type() const {
+  Type GetType() const {
     return patch_type_;
   }
 
   bool IsPcRelative() const {
-    return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray;
+    switch (GetType()) {
+      case Type::kCallRelative:
+      case Type::kStringRelative:
+      case Type::kDexCacheArray:
+        return true;
+      default:
+        return false;
+    }
   }
 
   MethodReference TargetMethod() const {
-    DCHECK(patch_type_ == kLinkerPatchMethod ||
-           patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative);
+    DCHECK(patch_type_ == Type::kMethod ||
+           patch_type_ == Type::kCall ||
+           patch_type_ == Type::kCallRelative);
     return MethodReference(target_dex_file_, method_idx_);
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == kLinkerPatchType);
+    DCHECK(patch_type_ == Type::kType);
     return target_dex_file_;
   }
 
   uint32_t TargetTypeIndex() const {
-    DCHECK(patch_type_ == kLinkerPatchType);
+    DCHECK(patch_type_ == Type::kType);
     return type_idx_;
   }
 
+  const DexFile* TargetStringDexFile() const {
+    DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative);
+    return target_dex_file_;
+  }
+
+  uint32_t TargetStringIndex() const {
+    DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative);
+    return string_idx_;
+  }
+
   const DexFile* TargetDexCacheDexFile() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kDexCacheArray);
     return target_dex_file_;
   }
 
   size_t TargetDexCacheElementOffset() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kDexCacheArray);
     return element_offset_;
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    DCHECK(patch_type_ == Type::kStringRelative || patch_type_ == Type::kDexCacheArray);
     return pc_insn_offset_;
   }
 
  private:
-  LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file)
+  LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file)
       : target_dex_file_(target_dex_file),
         literal_offset_(literal_offset),
         patch_type_(patch_type) {
@@ -272,14 +322,16 @@
 
   const DexFile* target_dex_file_;
   uint32_t literal_offset_ : 24;  // Method code size up to 16MiB.
-  LinkerPatchType patch_type_ : 8;
+  Type patch_type_ : 8;
   union {
     uint32_t cmp1_;             // Used for relational operators.
     uint32_t method_idx_;       // Method index for Call/Method patches.
     uint32_t type_idx_;         // Type index for Type patches.
+    uint32_t string_idx_;       // String index for String patches.
     uint32_t element_offset_;   // Element offset in the dex cache arrays.
     static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
   };
   union {
@@ -295,6 +347,7 @@
   friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
   friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
 };
+std::ostream& operator<<(std::ostream& os, const LinkerPatch::Type& type);
 
 inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
   return lhs.literal_offset_ == rhs.literal_offset_ &&
@@ -324,9 +377,7 @@
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
                  const ArrayRef<const SrcMapElem>& src_mapping_table,
-                 const ArrayRef<const uint8_t>& mapping_table,
                  const ArrayRef<const uint8_t>& vmap_table,
-                 const ArrayRef<const uint8_t>& native_gc_map,
                  const ArrayRef<const uint8_t>& cfi_info,
                  const ArrayRef<const LinkerPatch>& patches);
 
@@ -340,9 +391,7 @@
       const uint32_t core_spill_mask,
       const uint32_t fp_spill_mask,
       const ArrayRef<const SrcMapElem>& src_mapping_table,
-      const ArrayRef<const uint8_t>& mapping_table,
       const ArrayRef<const uint8_t>& vmap_table,
-      const ArrayRef<const uint8_t>& native_gc_map,
       const ArrayRef<const uint8_t>& cfi_info,
       const ArrayRef<const LinkerPatch>& patches);
 
@@ -364,18 +413,10 @@
     return GetArray(src_mapping_table_);
   }
 
-  ArrayRef<const uint8_t> GetMappingTable() const {
-    return GetArray(mapping_table_);
-  }
-
   ArrayRef<const uint8_t> GetVmapTable() const {
     return GetArray(vmap_table_);
   }
 
-  ArrayRef<const uint8_t> GetGcMap() const {
-    return GetArray(gc_map_);
-  }
-
   ArrayRef<const uint8_t> GetCFIInfo() const {
     return GetArray(cfi_info_);
   }
@@ -393,14 +434,8 @@
   const uint32_t fp_spill_mask_;
   // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
   const LengthPrefixedArray<SrcMapElem>* const src_mapping_table_;
-  // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to
-  // native PC offset. Size prefixed.
-  const LengthPrefixedArray<uint8_t>* const mapping_table_;
   // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed.
   const LengthPrefixedArray<uint8_t>* const vmap_table_;
-  // For quick code, a map keyed by native PC indices to bitmaps describing what dalvik registers
-  // are live.
-  const LengthPrefixedArray<uint8_t>* const gc_map_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 97c60de..487a27f 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -27,8 +27,6 @@
 }
 
 class ArtMethod;
-class Backend;
-struct CompilationUnit;
 class CompilerDriver;
 class CompiledMethod;
 class OatWriter;
@@ -46,8 +44,7 @@
 
   virtual void UnInit() const = 0;
 
-  virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu)
-      const = 0;
+  virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const = 0;
 
   virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                                   uint32_t access_flags,
@@ -77,8 +74,6 @@
     return maximum_compilation_time_before_warning_;
   }
 
-  virtual void InitCompilationUnit(CompilationUnit& cu) const = 0;
-
   virtual ~Compiler() {}
 
   /*
diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc
index 2ba3af5..866bf43 100644
--- a/compiler/debug/dwarf/dwarf_test.cc
+++ b/compiler/debug/dwarf/dwarf_test.cc
@@ -27,7 +27,7 @@
 namespace dwarf {
 
 // Run the tests only on host since we need objdump.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT;
 
@@ -341,7 +341,7 @@
   CheckObjdumpOutput(is64bit, "-W");
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index a6e6f8b..e8e278d 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -180,8 +180,8 @@
       std::vector<DexRegisterMap> dex_reg_maps;
       if (mi->code_info != nullptr) {
         const CodeInfo code_info(mi->code_info);
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        for (size_t s = 0; s < code_info.GetNumberOfStackMaps(); ++s) {
+        CodeInfoEncoding encoding = code_info.ExtractEncoding();
+        for (size_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); ++s) {
           const StackMap& stack_map = code_info.GetStackMapAt(s, encoding);
           dex_reg_maps.push_back(code_info.GetDexRegisterMapOf(
               stack_map, encoding, dex_code->registers_size_));
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index 66e135f..3db7306 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -98,15 +98,15 @@
       if (mi->code_info != nullptr) {
         // Use stack maps to create mapping table from pc to dex.
         const CodeInfo code_info(mi->code_info);
-        const StackMapEncoding encoding = code_info.ExtractEncoding();
-        pc2dex_map.reserve(code_info.GetNumberOfStackMaps());
-        for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+        const CodeInfoEncoding encoding = code_info.ExtractEncoding();
+        pc2dex_map.reserve(code_info.GetNumberOfStackMaps(encoding));
+        for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) {
           StackMap stack_map = code_info.GetStackMapAt(s, encoding);
           DCHECK(stack_map.IsValid());
-          const uint32_t pc = stack_map.GetNativePcOffset(encoding);
-          const int32_t dex = stack_map.GetDexPc(encoding);
+          const uint32_t pc = stack_map.GetNativePcOffset(encoding.stack_map_encoding);
+          const int32_t dex = stack_map.GetDexPc(encoding.stack_map_encoding);
           pc2dex_map.push_back({pc, dex});
-          if (stack_map.HasDexRegisterMap(encoding)) {
+          if (stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
             // Guess that the first map with local variables is the end of prologue.
             prologue_end = std::min(prologue_end, pc);
           }
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index 4712d47..9645643 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -98,12 +98,12 @@
   // Get stack maps sorted by pc (they might not be sorted internally).
   // TODO(dsrbecky) Remove this once stackmaps get sorted by pc.
   const CodeInfo code_info(method_info->code_info);
-  const StackMapEncoding encoding = code_info.ExtractEncoding();
+  const CodeInfoEncoding encoding = code_info.ExtractEncoding();
   std::map<uint32_t, uint32_t> stack_maps;  // low_pc -> stack_map_index.
-  for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+  for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) {
     StackMap stack_map = code_info.GetStackMapAt(s, encoding);
     DCHECK(stack_map.IsValid());
-    if (!stack_map.HasDexRegisterMap(encoding)) {
+    if (!stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
       // The compiler creates stackmaps without register maps at the start of
       // basic blocks in order to keep instruction-accurate line number mapping.
       // However, we never stop at those (breakpoint locations always have map).
@@ -111,7 +111,7 @@
       // The main reason for this is to save space by avoiding undefined gaps.
       continue;
     }
-    const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding);
+    const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding.stack_map_encoding);
     DCHECK_LE(pc_offset, method_info->code_size);
     DCHECK_LE(compilation_unit_code_address, method_info->code_address);
     const uint32_t low_pc = dchecked_integral_cast<uint32_t>(
@@ -135,7 +135,7 @@
     }
 
     // Check that the stack map is in the requested range.
-    uint32_t dex_pc = stack_map.GetDexPc(encoding);
+    uint32_t dex_pc = stack_map.GetDexPc(encoding.stack_map_encoding);
     if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) {
       // The variable is not in scope at this PC. Therefore omit the entry.
       // Note that this is different to None() entry which means in scope, but unknown location.
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 4dd8024..b7e000a 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -110,7 +110,7 @@
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForMethodsInternal(
+static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
@@ -126,14 +126,10 @@
                  false /* write_oat_patches */);
   builder->End();
   CHECK(builder->Good());
-  // Make a copy of the buffer.  We want to shrink it anyway.
-  uint8_t* result = new uint8_t[buffer.size()];
-  CHECK(result != nullptr);
-  memcpy(result, buffer.data(), buffer.size());
-  return ArrayRef<const uint8_t>(result, buffer.size());
+  return buffer;
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> WriteDebugElfFileForMethods(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
@@ -145,7 +141,7 @@
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForClassesInternal(
+static std::vector<uint8_t> WriteDebugElfFileForClassesInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<mirror::Class*>& types)
@@ -164,16 +160,12 @@
 
   builder->End();
   CHECK(builder->Good());
-  // Make a copy of the buffer.  We want to shrink it anyway.
-  uint8_t* result = new uint8_t[buffer.size()];
-  CHECK(result != nullptr);
-  memcpy(result, buffer.data(), buffer.size());
-  return ArrayRef<const uint8_t>(result, buffer.size());
+  return buffer;
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
-                                                    const InstructionSetFeatures* features,
-                                                    const ArrayRef<mirror::Class*>& types) {
+std::vector<uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
+                                                 const InstructionSetFeatures* features,
+                                                 const ArrayRef<mirror::Class*>& types) {
   if (Is64BitInstructionSet(isa)) {
     return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, features, types);
   } else {
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 736370e..6f52249 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -47,12 +47,12 @@
     size_t text_section_size,
     const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> WriteDebugElfFileForMethods(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(
+std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<mirror::Class*>& types)
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 4836041..3ce786e 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -327,10 +327,16 @@
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    art::DexCompilationUnit unit(nullptr, class_loader, class_linker,
-                                 dex_file, code_item, class_def_idx, method_idx, access_flags,
-                                 driver->GetVerifiedMethod(&dex_file, method_idx),
-                                 hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
+    art::DexCompilationUnit unit(
+        class_loader,
+        class_linker,
+        dex_file,
+        code_item,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        driver->GetVerifiedMethod(&dex_file, method_idx),
+        hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
     art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level);
     dex_compiler.Compile();
     if (dex_compiler.GetQuickenedInfo().empty()) {
@@ -357,9 +363,7 @@
         0,
         0,
         ArrayRef<const SrcMapElem>(),                // src_mapping_table
-        ArrayRef<const uint8_t>(),                   // mapping_table
         ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
-        ArrayRef<const uint8_t>(),                   // gc_map
         ArrayRef<const uint8_t>(),                   // cfi data
         ArrayRef<const LinkerPatch>());
   }
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 7c9ce1e..606302b 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -60,11 +60,10 @@
     // TODO: Investigate why are we doing the work again for this method and try to avoid it.
     LOG(WARNING) << "Method processed more than once: "
         << PrettyMethod(ref.dex_method_index, *ref.dex_file);
-    if (!Runtime::Current()->UseJit()) {
+    if (!Runtime::Current()->UseJitCompilation()) {
       DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
       DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
     }
-    DCHECK_EQ(it->second->GetDexGcMap().size(), verified_method->GetDexGcMap().size());
     // Delete the new verified method since there was already an existing one registered. It
     // is unsafe to replace the existing one since the JIT may be using it to generate a
     // native GC map.
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 9ae2164..bace014 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -30,7 +30,6 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "utils.h"
-#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/reg_type-inl.h"
 #include "verifier/register_line-inl.h"
@@ -49,21 +48,14 @@
                          method_verifier->HasInstructionThatWillThrow()));
 
   if (compile) {
-    /* Generate a register map. */
-    if (!verified_method->GenerateGcMap(method_verifier)) {
-      return nullptr;  // Not a real failure, but a failure to encode.
-    }
-    if (kIsDebugBuild) {
-      VerifyGcMap(method_verifier, verified_method->dex_gc_map_);
-    }
-
     // TODO: move this out when DEX-to-DEX supports devirtualization.
     if (method_verifier->HasVirtualOrInterfaceInvokes()) {
       verified_method->GenerateDevirtMap(method_verifier);
     }
 
     // Only need dequicken info for JIT so far.
-    if (Runtime::Current()->UseJit() && !verified_method->GenerateDequickenMap(method_verifier)) {
+    if (Runtime::Current()->UseJitCompilation() &&
+        !verified_method->GenerateDequickenMap(method_verifier)) {
       return nullptr;
     }
   }
@@ -81,7 +73,7 @@
 }
 
 const DexFileReference* VerifiedMethod::GetDequickenIndex(uint32_t dex_pc) const {
-  DCHECK(Runtime::Current()->UseJit());
+  DCHECK(Runtime::Current()->UseJitCompilation());
   auto it = dequicken_map_.find(dex_pc);
   return (it != dequicken_map_.end()) ? &it->second : nullptr;
 }
@@ -90,120 +82,6 @@
   return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
 }
 
-bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
-  DCHECK(dex_gc_map_.empty());
-  size_t num_entries, ref_bitmap_bits, pc_bits;
-  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  const size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
-  static constexpr size_t kFormatBits = 3;
-  // We have 16 - kFormatBits available for the ref_bitmap_bytes.
-  if ((ref_bitmap_bytes >> (16u - kFormatBits)) != 0) {
-    LOG(WARNING) << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers: "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  // There are 2 bytes to encode the number of entries.
-  if (num_entries > std::numeric_limits<uint16_t>::max()) {
-    LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  size_t pc_bytes;
-  verifier::RegisterMapFormat format;
-  if (pc_bits <= kBitsPerByte) {
-    format = verifier::kRegMapFormatCompact8;
-    pc_bytes = 1;
-  } else if (pc_bits <= kBitsPerByte * 2) {
-    format = verifier::kRegMapFormatCompact16;
-    pc_bytes = 2;
-  } else {
-    LOG(WARNING) << "Cannot encode GC map for method with "
-                 << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2): "
-                 << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
-                                 *method_verifier->GetMethodReference().dex_file);
-    return false;
-  }
-  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
-  dex_gc_map_.reserve(table_size);
-  // Write table header.
-  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> (kBitsPerByte - kFormatBits)));
-  dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
-  dex_gc_map_.push_back(num_entries & 0xFF);
-  dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
-  // Write table data.
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      dex_gc_map_.push_back(i & 0xFF);
-      if (pc_bytes == 2) {
-        dex_gc_map_.push_back((i >> 8) & 0xFF);
-      }
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      line->WriteReferenceBitMap(method_verifier, &dex_gc_map_, ref_bitmap_bytes);
-    }
-  }
-  DCHECK_EQ(dex_gc_map_.size(), table_size);
-  return true;
-}
-
-void VerifiedMethod::VerifyGcMap(verifier::MethodVerifier* method_verifier,
-                                 const std::vector<uint8_t>& data) {
-  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
-  // that the table data is well formed and all references are marked (or not) in the bitmap.
-  verifier::DexPcToReferenceMap map(&data[0]);
-  CHECK_EQ(data.size(), map.RawSize()) << map.NumEntries() << " " << map.RegWidth();
-  size_t map_index = 0;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      DCHECK_LT(map_index, map.NumEntries());
-      DCHECK_EQ(map.GetDexPc(map_index), i);
-      DCHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
-      map_index++;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      for (size_t j = 0; j < code_item->registers_size_; j++) {
-        if (line->GetRegisterType(method_verifier, j).IsNonZeroReferenceTypes()) {
-          DCHECK_LT(j / kBitsPerByte, map.RegWidth());
-          DCHECK_EQ((reg_bitmap[j / kBitsPerByte] >> (j % kBitsPerByte)) & 1, 1);
-        } else if ((j / kBitsPerByte) < map.RegWidth()) {
-          DCHECK_EQ((reg_bitmap[j / kBitsPerByte] >> (j % kBitsPerByte)) & 1, 0);
-        } else {
-          // If a register doesn't contain a reference then the bitmap may be shorter than the line.
-        }
-      }
-    } else {
-      DCHECK(i >= 65536 || reg_bitmap == nullptr);
-    }
-  }
-}
-
-void VerifiedMethod::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                                       size_t* gc_points, size_t* ref_bitmap_bits,
-                                       size_t* log2_max_gc_pc) {
-  size_t local_gc_points = 0;
-  size_t max_insn = 0;
-  size_t max_ref_reg = -1;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      local_gc_points++;
-      max_insn = i;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      max_ref_reg = line->GetMaxNonZeroReferenceReg(method_verifier, max_ref_reg);
-    }
-  }
-  *gc_points = local_gc_points;
-  *ref_bitmap_bits = max_ref_reg + 1;  // If max register is 0 we need 1 bit to encode (ie +1).
-  size_t i = 0;
-  while ((1U << i) <= max_insn) {
-    i++;
-  }
-  *log2_max_gc_pc = i;
-}
-
 bool VerifiedMethod::GenerateDequickenMap(verifier::MethodVerifier* method_verifier) {
   if (method_verifier->HasFailures()) {
     return false;
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 12d0219..495acf0 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -47,10 +47,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   ~VerifiedMethod() = default;
 
-  const std::vector<uint8_t>& GetDexGcMap() const {
-    return dex_gc_map_;
-  }
-
   const DevirtualizationMap& GetDevirtMap() const {
     return devirt_map_;
   }
@@ -114,7 +110,6 @@
   void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  std::vector<uint8_t> dex_gc_map_;
   DevirtualizationMap devirt_map_;
   // Dequicken map is required for compiling quickened byte codes. The quicken maps from
   // dex PC to dex method index or dex field index based on the instruction.
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 510613e..a0a8f81 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -174,11 +174,8 @@
       dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_src_mapping_table_("dedupe source mapping table",
                                 LengthPrefixedArrayAlloc<SrcMapElem>(swap_space_.get())),
-      dedupe_mapping_table_("dedupe mapping table",
-                            LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_vmap_table_("dedupe vmap table",
                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
-      dedupe_gc_map_("dedupe gc map", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_linker_patches_("dedupe cfi info",
                              LengthPrefixedArrayAlloc<LinkerPatch>(swap_space_.get())) {
@@ -196,9 +193,7 @@
   if (extended) {
     Thread* self = Thread::Current();
     os << "\nCode dedupe: " << dedupe_code_.DumpStats(self);
-    os << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats(self);
     os << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(self);
-    os << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats(self);
     os << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(self);
   }
 }
@@ -221,15 +216,6 @@
   ReleaseArrayIfNotDeduplicated(src_map);
 }
 
-const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateMappingTable(
-    const ArrayRef<const uint8_t>& table) {
-  return AllocateOrDeduplicateArray(table, &dedupe_mapping_table_);
-}
-
-void CompiledMethodStorage::ReleaseMappingTable(const LengthPrefixedArray<uint8_t>* table) {
-  ReleaseArrayIfNotDeduplicated(table);
-}
-
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
     const ArrayRef<const uint8_t>& table) {
   return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
@@ -239,15 +225,6 @@
   ReleaseArrayIfNotDeduplicated(table);
 }
 
-const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateGCMap(
-    const ArrayRef<const uint8_t>& gc_map) {
-  return AllocateOrDeduplicateArray(gc_map, &dedupe_gc_map_);
-}
-
-void CompiledMethodStorage::ReleaseGCMap(const LengthPrefixedArray<uint8_t>* gc_map) {
-  ReleaseArrayIfNotDeduplicated(gc_map);
-}
-
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
     const ArrayRef<const uint8_t>& cfi_info) {
   return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index d6961a0..8674abf 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -56,15 +56,9 @@
       const ArrayRef<const SrcMapElem>& src_map);
   void ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map);
 
-  const LengthPrefixedArray<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& table);
-  void ReleaseMappingTable(const LengthPrefixedArray<uint8_t>* table);
-
   const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
   void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
 
-  const LengthPrefixedArray<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& gc_map);
-  void ReleaseGCMap(const LengthPrefixedArray<uint8_t>* gc_map);
-
   const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
   void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
 
@@ -103,9 +97,7 @@
 
   ArrayDedupeSet<uint8_t> dedupe_code_;
   ArrayDedupeSet<SrcMapElem> dedupe_src_mapping_table_;
-  ArrayDedupeSet<uint8_t> dedupe_mapping_table_;
   ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
-  ArrayDedupeSet<uint8_t> dedupe_gc_map_;
   ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
   ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
 
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 0695cb5..6863f42 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -36,6 +36,7 @@
                         /* instruction_set_ */ kNone,
                         /* instruction_set_features */ nullptr,
                         /* boot_image */ false,
+                        /* app_image */ false,
                         /* image_classes */ nullptr,
                         /* compiled_classes */ nullptr,
                         /* compiled_methods */ nullptr,
@@ -61,24 +62,12 @@
       ArrayRef<const SrcMapElem>(raw_src_map1),
       ArrayRef<const SrcMapElem>(raw_src_map2),
   };
-  const uint8_t raw_mapping_table1[] = { 5, 6, 7 };
-  const uint8_t raw_mapping_table2[] = { 7, 6, 5, 4 };
-  ArrayRef<const uint8_t> mapping_table[] = {
-      ArrayRef<const uint8_t>(raw_mapping_table1),
-      ArrayRef<const uint8_t>(raw_mapping_table2),
-  };
   const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
   const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
   ArrayRef<const uint8_t> vmap_table[] = {
       ArrayRef<const uint8_t>(raw_vmap_table1),
       ArrayRef<const uint8_t>(raw_vmap_table2),
   };
-  const uint8_t raw_gc_map1[] = { 9, 8, 7 };
-  const uint8_t raw_gc_map2[] = { 6, 7, 8, 9 };
-  ArrayRef<const uint8_t> gc_map[] = {
-      ArrayRef<const uint8_t>(raw_gc_map1),
-      ArrayRef<const uint8_t>(raw_gc_map2),
-  };
   const uint8_t raw_cfi_info1[] = { 1, 3, 5 };
   const uint8_t raw_cfi_info2[] = { 8, 6, 4, 2 };
   ArrayRef<const uint8_t> cfi_info[] = {
@@ -102,49 +91,37 @@
   compiled_methods.reserve(1u << 7);
   for (auto&& c : code) {
     for (auto&& s : src_map) {
-      for (auto&& m : mapping_table) {
-        for (auto&& v : vmap_table) {
-          for (auto&& g : gc_map) {
-            for (auto&& f : cfi_info) {
-              for (auto&& p : patches) {
-                compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod(
-                        &driver, kNone, c, 0u, 0u, 0u, s, m, v, g, f, p));
-              }
-            }
+      for (auto&& v : vmap_table) {
+        for (auto&& f : cfi_info) {
+          for (auto&& p : patches) {
+            compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod(
+                &driver, kNone, c, 0u, 0u, 0u, s, v, f, p));
           }
         }
       }
     }
   }
-  constexpr size_t code_bit = 1u << 6;
-  constexpr size_t src_map_bit = 1u << 5;
-  constexpr size_t mapping_table_bit = 1u << 4;
-  constexpr size_t vmap_table_bit = 1u << 3;
-  constexpr size_t gc_map_bit = 1u << 2;
+  constexpr size_t code_bit = 1u << 4;
+  constexpr size_t src_map_bit = 1u << 3;
+  constexpr size_t vmap_table_bit = 1u << 2;
   constexpr size_t cfi_info_bit = 1u << 1;
   constexpr size_t patches_bit = 1u << 0;
-  CHECK_EQ(compiled_methods.size(), 1u << 7);
+  CHECK_EQ(compiled_methods.size(), 1u << 5);
   for (size_t i = 0; i != compiled_methods.size(); ++i) {
     for (size_t j = 0; j != compiled_methods.size(); ++j) {
       CompiledMethod* lhs = compiled_methods[i];
       CompiledMethod* rhs = compiled_methods[j];
       bool same_code = ((i ^ j) & code_bit) == 0u;
       bool same_src_map = ((i ^ j) & src_map_bit) == 0u;
-      bool same_mapping_table = ((i ^ j) & mapping_table_bit) == 0u;
       bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u;
-      bool same_gc_map = ((i ^ j) & gc_map_bit) == 0u;
       bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u;
       bool same_patches = ((i ^ j) & patches_bit) == 0u;
       ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
           << i << " " << j;
       ASSERT_EQ(same_src_map, lhs->GetSrcMappingTable().data() == rhs->GetSrcMappingTable().data())
           << i << " " << j;
-      ASSERT_EQ(same_mapping_table, lhs->GetMappingTable().data() == rhs->GetMappingTable().data())
-          << i << " " << j;
       ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
           << i << " " << j;
-      ASSERT_EQ(same_gc_map, lhs->GetGcMap().data() == rhs->GetGcMap().data())
-          << i << " " << j;
       ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data())
           << i << " " << j;
       ASSERT_EQ(same_patches, lhs->GetPatches().data() == rhs->GetPatches().data())
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ea16cb2..1ab1d31 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -341,6 +341,7 @@
     InstructionSet instruction_set,
     const InstructionSetFeatures* instruction_set_features,
     bool boot_image,
+    bool app_image,
     std::unordered_set<std::string>* image_classes,
     std::unordered_set<std::string>* compiled_classes,
     std::unordered_set<std::string>* compiled_methods,
@@ -357,12 +358,13 @@
       compiler_kind_(compiler_kind),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
-      freezing_constructor_lock_("freezing constructor lock"),
+      requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
       compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
       boot_image_(boot_image),
+      app_image_(app_image),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
@@ -416,23 +418,27 @@
                                 type ## _ENTRYPOINT_OFFSET(4, offset)); \
     }
 
-const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const {
   CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickGenericJniTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickGenericJniTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickGenericJniTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickImtConflictTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickImtConflictTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickImtConflictTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickResolutionTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickResolutionTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickResolutionTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickToInterpreterBridge() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickToInterpreterBridge()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickToInterpreterBridge)
 }
 #undef CREATE_TRAMPOLINE
@@ -469,7 +475,7 @@
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto* const runtime = Runtime::Current();
-  if (runtime->UseJit() || driver.GetCompilerOptions().VerifyAtRuntime()) {
+  if (runtime->UseJitCompilation() || driver.GetCompilerOptions().VerifyAtRuntime()) {
     // Verify at runtime shouldn't dex to dex since we didn't resolve of verify.
     return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
@@ -941,7 +947,7 @@
 
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
-  ResolveCatchBlockExceptionsClassVisitor(
+  explicit ResolveCatchBlockExceptionsClassVisitor(
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
@@ -1264,7 +1270,7 @@
 bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) {
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {
-    DCHECK(runtime->UseJit());
+    DCHECK(runtime->UseJitCompilation());
     // Having the klass reference here implies that the klass is already loaded.
     return true;
   }
@@ -1279,14 +1285,13 @@
   return IsImageClass(descriptor);
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
+bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                                      uint32_t type_idx) {
   bool result = false;
   if ((IsBootImage() &&
-       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) ||
-      Runtime::Current()->UseJit()) {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-        soa.Self(), dex_file, false);
+       IsImageClass(dex_cache->GetDexFile()->StringDataByIdx(
+           dex_cache->GetDexFile()->GetTypeId(type_idx).descriptor_idx_))) ||
+      Runtime::Current()->UseJitCompilation()) {
     mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
     result = (resolved_class != nullptr);
   }
@@ -1304,7 +1309,7 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsBootImage() || Runtime::Current()->UseJit()) {
+  if (IsBootImage() || Runtime::Current()->UseJitCompilation()) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
@@ -1316,7 +1321,7 @@
       result = true;
     } else {
       // Just check whether the dex cache already has the string.
-      DCHECK(Runtime::Current()->UseJit());
+      DCHECK(Runtime::Current()->UseJitCompilation());
       result = (dex_cache->GetResolvedString(string_idx) != nullptr);
     }
   }
@@ -1328,32 +1333,16 @@
   return result;
 }
 
-bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                                uint32_t type_idx,
-                                                bool* type_known_final, bool* type_known_abstract,
-                                                bool* equals_referrers_class) {
-  if (type_known_final != nullptr) {
-    *type_known_final = false;
-  }
-  if (type_known_abstract != nullptr) {
-    *type_known_abstract = false;
-  }
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = false;
-  }
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                                Handle<mirror::DexCache> dex_cache,
+                                                uint32_t type_idx) {
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
     return false;  // Unknown class needs access checks.
   }
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = (method_id.class_idx_ == type_idx);
-  }
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
   bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
   if (!is_accessible) {
     mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
@@ -1367,12 +1356,6 @@
   }
   if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
-    if (type_known_final != nullptr) {
-      *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
-    }
-    if (type_known_abstract != nullptr) {
-      *type_known_abstract = resolved_class->IsAbstract() && !resolved_class->IsArrayClass();
-    }
   } else {
     stats_->TypeNeedsAccessCheck();
   }
@@ -1380,12 +1363,9 @@
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                                            const DexFile& dex_file,
+                                                            Handle<mirror::DexCache> dex_cache,
                                                             uint32_t type_idx,
                                                             bool* finalizable) {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1395,7 +1375,7 @@
     return false;  // Unknown class needs access checks.
   }
   *finalizable = resolved_class->IsFinalizable();
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
   bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
   if (!is_accessible) {
     mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
@@ -1449,7 +1429,7 @@
     } else {
       return false;
     }
-  } else if (runtime->UseJit() && !heap->IsMovableObject(resolved_class)) {
+  } else if (runtime->UseJitCompilation() && !heap->IsMovableObject(resolved_class)) {
     *is_type_initialized = resolved_class->IsInitialized();
     // If the class may move around, then don't embed it as a direct pointer.
     *use_direct_type_ptr = true;
@@ -1583,53 +1563,6 @@
   }
 }
 
-bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                            bool is_put, MemberOffset* field_offset,
-                                            uint32_t* storage_index, bool* is_referrers_class,
-                                            bool* is_volatile, bool* is_initialized,
-                                            Primitive::Type* type) {
-  ScopedObjectAccess soa(Thread::Current());
-  // Try to resolve the field and compiling method's class.
-  ArtField* resolved_field;
-  mirror::Class* referrer_class;
-  Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache());
-  {
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::ClassLoader> class_loader_handle(
-        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
-    resolved_field =
-        ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, true);
-    referrer_class = resolved_field != nullptr
-        ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr;
-  }
-  bool result = false;
-  if (resolved_field != nullptr && referrer_class != nullptr) {
-    *is_volatile = IsFieldVolatile(resolved_field);
-    std::pair<bool, bool> fast_path = IsFastStaticField(
-        dex_cache.Get(), referrer_class, resolved_field, field_idx, storage_index);
-    result = is_put ? fast_path.second : fast_path.first;
-  }
-  if (result) {
-    *field_offset = GetFieldOffset(resolved_field);
-    *is_referrers_class = IsStaticFieldInReferrerClass(referrer_class, resolved_field);
-    // *is_referrers_class == true implies no worrying about class initialization.
-    *is_initialized = (*is_referrers_class) ||
-        (IsStaticFieldsClassInitialized(referrer_class, resolved_field) &&
-         CanAssumeTypeIsPresentInDexCache(*mUnit->GetDexFile(), *storage_index));
-    *type = resolved_field->GetTypeAsPrimitiveType();
-  } else {
-    // Conservative defaults.
-    *is_volatile = true;
-    *field_offset = MemberOffset(static_cast<size_t>(-1));
-    *storage_index = -1;
-    *is_referrers_class = false;
-    *is_initialized = false;
-    *type = Primitive::kPrimVoid;
-  }
-  ProcessedStaticField(result, *is_referrers_class);
-  return result;
-}
-
 void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type,
                                                    bool no_guarantee_of_dex_cache_entry,
                                                    const mirror::Class* referrer_class,
@@ -1673,7 +1606,7 @@
       }
     }
   }
-  if (runtime->UseJit()) {
+  if (runtime->UseJitCompilation()) {
     // If we are the JIT, then don't allow a direct call to the interpreter bridge since this will
     // never be updated even after we compile the method.
     if (cl->IsQuickToInterpreterBridge(
@@ -1705,7 +1638,7 @@
   bool must_use_direct_pointers = false;
   mirror::DexCache* dex_cache = declaring_class->GetDexCache();
   if (target_method->dex_file == dex_cache->GetDexFile() &&
-    !(runtime->UseJit() && dex_cache->GetResolvedMethod(
+    !(runtime->UseJitCompilation() && dex_cache->GetResolvedMethod(
         method->GetDexMethodIndex(), pointer_size) == nullptr)) {
     target_method->dex_method_index = method->GetDexMethodIndex();
   } else {
@@ -1742,7 +1675,7 @@
         break;
       }
     }
-    if (method_in_image || compiling_boot || runtime->UseJit()) {
+    if (method_in_image || compiling_boot || runtime->UseJitCompilation()) {
       // We know we must be able to get to the method in the image, so use that pointer.
       // In the case where we are the JIT, we can always use direct pointers since we know where
       // the method and its code are / will be. We don't sharpen to interpreter bridge since we
@@ -2006,6 +1939,28 @@
   self->ClearException();
 }
 
+bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file,
+                                                uint16_t class_def_idx) const {
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
+  const uint8_t* class_data = dex_file.GetClassData(class_def);
+  if (class_data == nullptr) {
+    // Empty class such as a marker interface.
+    return false;
+  }
+  ClassDataItemIterator it(dex_file, class_data);
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  // We require a constructor barrier if there are final instance fields.
+  while (it.HasNextInstanceField()) {
+    if (it.MemberIsFinal()) {
+      return true;
+    }
+    it.Next();
+  }
+  return false;
+}
+
 class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor {
  public:
   explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager)
@@ -2110,9 +2065,10 @@
         DCHECK(!it.HasNext());
       }
     }
-    if (requires_constructor_barrier) {
-      manager_->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index);
-    }
+    manager_->GetCompiler()->SetRequiresConstructorBarrier(self,
+                                                           &dex_file,
+                                                           class_def_index,
+                                                           requires_constructor_barrier);
   }
 
  private:
@@ -2212,7 +2168,8 @@
 
 class VerifyClassVisitor : public CompilationVisitor {
  public:
-  explicit VerifyClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
+  VerifyClassVisitor(const ParallelCompilationManager* manager, LogSeverity log_level)
+     : manager_(manager), log_level_(log_level) {}
 
   virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
     ATRACE_CALL();
@@ -2250,7 +2207,7 @@
                                                 &class_def,
                                                 Runtime::Current()->GetCompilerCallbacks(),
                                                 true /* allow soft failures */,
-                                                true /* log hard failures */,
+                                                log_level_,
                                                 &error_msg) ==
                                                     verifier::MethodVerifier::kHardFailure) {
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
@@ -2259,7 +2216,7 @@
       }
     } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
       CHECK(klass->IsResolved()) << PrettyClass(klass.Get());
-      class_linker->VerifyClass(soa.Self(), klass);
+      class_linker->VerifyClass(soa.Self(), klass, log_level_);
 
       if (klass->IsErroneous()) {
         // ClassLinker::VerifyClass throws, which isn't useful in the compiler.
@@ -2282,6 +2239,7 @@
 
  private:
   const ParallelCompilationManager* const manager_;
+  const LogSeverity log_level_;
 };
 
 void CompilerDriver::VerifyDexFile(jobject class_loader,
@@ -2294,7 +2252,10 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
-  VerifyClassVisitor visitor(&context);
+  LogSeverity log_level = GetCompilerOptions().AbortOnHardVerifierFailure()
+                              ? LogSeverity::INTERNAL_FATAL
+                              : LogSeverity::WARNING;
+  VerifyClassVisitor visitor(&context, log_level);
   context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
@@ -2481,6 +2442,27 @@
   context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count);
 }
 
+class InitializeArrayClassesAndCreateConflictTablesVisitor : public ClassVisitor {
+ public:
+  virtual bool operator()(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      return true;
+    }
+    if (klass->IsArrayClass()) {
+      StackHandleScope<1> hs(Thread::Current());
+      Runtime::Current()->GetClassLinker()->EnsureInitialized(hs.Self(),
+                                                              hs.NewHandle(klass),
+                                                              true,
+                                                              true);
+    }
+    // Create the conflict tables.
+    if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) {
+      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
+    }
+    return true;
+  }
+};
+
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
                                        TimingLogger* timings) {
@@ -2489,6 +2471,16 @@
     CHECK(dex_file != nullptr);
     InitializeClasses(class_loader, *dex_file, dex_files, timings);
   }
+  if (boot_image_ || app_image_) {
+    // Make sure that we call EnsureIntiailized on all the array classes to call
+    // SetVerificationAttempted so that the access flags are set. If we do not do this they get
+    // changed at runtime resulting in more dirty image pages.
+    // Also create conflict tables.
+    // Only useful if we are compiling an image (image_classes_ is not null).
+    ScopedObjectAccess soa(Thread::Current());
+    InitializeArrayClassesAndCreateConflictTablesVisitor visitor;
+    Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&visitor);
+  }
   if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
     Runtime::Current()->GetHeap()->CollectGarbage(true);
@@ -2742,16 +2734,29 @@
   return non_relative_linker_patch_count_;
 }
 
-void CompilerDriver::AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                   uint16_t class_def_index) {
-  WriterMutexLock mu(self, freezing_constructor_lock_);
-  freezing_constructor_classes_.insert(ClassReference(dex_file, class_def_index));
+void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
+                                                   const DexFile* dex_file,
+                                                   uint16_t class_def_index,
+                                                   bool requires) {
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  requires_constructor_barrier_.emplace(ClassReference(dex_file, class_def_index), requires);
 }
 
-bool CompilerDriver::RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                uint16_t class_def_index) const {
-  ReaderMutexLock mu(self, freezing_constructor_lock_);
-  return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
+bool CompilerDriver::RequiresConstructorBarrier(Thread* self,
+                                                const DexFile* dex_file,
+                                                uint16_t class_def_index) {
+  ClassReference class_ref(dex_file, class_def_index);
+  {
+    ReaderMutexLock mu(self, requires_constructor_barrier_lock_);
+    auto it = requires_constructor_barrier_.find(class_ref);
+    if (it != requires_constructor_barrier_.end()) {
+      return it->second;
+    }
+  }
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  const bool requires = RequiresConstructorBarrier(*dex_file, class_def_index);
+  requires_constructor_barrier_.emplace(class_ref, requires);
+  return requires;
 }
 
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 64a06a2..19a1ecc 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -92,6 +92,7 @@
                  InstructionSet instruction_set,
                  const InstructionSetFeatures* instruction_set_features,
                  bool boot_image,
+                 bool app_image,
                  std::unordered_set<std::string>* image_classes,
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
@@ -161,11 +162,11 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateJniDlsymLookup() const;
-  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateJniDlsymLookup() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickGenericJniTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickImtConflictTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
 
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
@@ -183,34 +184,38 @@
   // Remove and delete a compiled method.
   void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
 
-  void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                     uint16_t class_def_index)
-      REQUIRES(!freezing_constructor_lock_);
-  bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                  uint16_t class_def_index) const
-      REQUIRES(!freezing_constructor_lock_);
+  void SetRequiresConstructorBarrier(Thread* self,
+                                     const DexFile* dex_file,
+                                     uint16_t class_def_index,
+                                     bool requires)
+      REQUIRES(!requires_constructor_barrier_lock_);
+  bool RequiresConstructorBarrier(Thread* self,
+                                  const DexFile* dex_file,
+                                  uint16_t class_def_index)
+      REQUIRES(!requires_constructor_barrier_lock_);
 
   // Callbacks from compiler to see what runtime checks must be generated.
 
-  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx);
+  bool CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                        uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
       REQUIRES(!Locks::mutator_lock_);
 
   // Are runtime access checks necessary in the compiled code?
-  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                  uint32_t type_idx, bool* type_known_final = nullptr,
-                                  bool* type_known_abstract = nullptr,
-                                  bool* equals_referrers_class = nullptr)
-      REQUIRES(!Locks::mutator_lock_);
+  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
   // out_is_finalizable is set to whether the type is finalizable.
   bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                              const DexFile& dex_file,
+                                              Handle<mirror::DexCache> dex_cache,
                                               uint32_t type_idx,
                                               bool* out_is_finalizable)
-      REQUIRES(!Locks::mutator_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                           bool* is_type_initialized, bool* use_direct_type_ptr,
@@ -365,14 +370,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
 
-  // Can we fastpath static field access? Computes field's offset, volatility and whether the
-  // field is within the referrer (which can avoid checking class initialization).
-  bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
-                              MemberOffset* field_offset, uint32_t* storage_index,
-                              bool* is_referrers_class, bool* is_volatile, bool* is_initialized,
-                              Primitive::Type* type)
-      REQUIRES(!Locks::mutator_lock_);
-
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
   // index.
   bool ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc,
@@ -619,6 +616,8 @@
   void FreeThreadPools();
   void CheckThreadPools();
 
+  bool RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const;
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -629,9 +628,11 @@
   const InstructionSet instruction_set_;
   const InstructionSetFeatures* const instruction_set_features_;
 
-  // All class references that require
-  mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::set<ClassReference> freezing_constructor_classes_ GUARDED_BY(freezing_constructor_lock_);
+  // All class references that require constructor barriers. If the class reference is not in the
+  // set then the result has not yet been computed.
+  mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::map<ClassReference, bool> requires_constructor_barrier_
+      GUARDED_BY(requires_constructor_barrier_lock_);
 
   typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
   // All class references that this compiler has compiled.
@@ -652,6 +653,7 @@
   size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
 
   const bool boot_image_;
+  const bool app_image_;
 
   // If image_ is true, specifies the classes that will be included in the image.
   // Note if image_classes_ is null, all classes are included in the image.
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 0037564..b9a5a78 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -143,21 +143,11 @@
   // TODO: check that all Method::GetCode() values are non-null
 }
 
-TEST_F(CompilerDriverTest, DISABLED_AbstractMethodErrorStub) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(ScopedNullHandle<mirror::ClassLoader>(),
-                         "java.lang.Class",
-                         "isFinalizable",
-                         "()Z");
-    CompileDirectMethod(ScopedNullHandle<mirror::ClassLoader>(),
-                        "java.lang.Object",
-                        "<init>",
-                        "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != nullptr);
@@ -197,8 +187,6 @@
 };
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
@@ -303,8 +291,6 @@
 };
 
 TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 1bd4c3a..f20dba3 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 CompilerOptions::CompilerOptions()
-    : compiler_filter_(kDefaultCompilerFilter),
+    : compiler_filter_(CompilerFilter::kDefaultCompilerFilter),
       huge_method_threshold_(kDefaultHugeMethodThreshold),
       large_method_threshold_(kDefaultLargeMethodThreshold),
       small_method_threshold_(kDefaultSmallMethodThreshold),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index c67ab6e..6bbd3c5 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -31,7 +31,6 @@
 class CompilerOptions FINAL {
  public:
   // Guide heuristics to determine whether to compile method if profile data not available.
-  static const CompilerFilter::Filter kDefaultCompilerFilter = CompilerFilter::kSpeed;
   static const size_t kDefaultHugeMethodThreshold = 10000;
   static const size_t kDefaultLargeMethodThreshold = 600;
   static const size_t kDefaultSmallMethodThreshold = 60;
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index e458b98..b0ee448 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -22,8 +22,7 @@
 
 namespace art {
 
-DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu,
-                                       jobject class_loader,
+DexCompilationUnit::DexCompilationUnit(jobject class_loader,
                                        ClassLinker* class_linker,
                                        const DexFile& dex_file,
                                        const DexFile::CodeItem* code_item,
@@ -32,8 +31,7 @@
                                        uint32_t access_flags,
                                        const VerifiedMethod* verified_method,
                                        Handle<mirror::DexCache> dex_cache)
-    : cu_(cu),
-      class_loader_(class_loader),
+    : class_loader_(class_loader),
       class_linker_(class_linker),
       dex_file_(&dex_file),
       code_item_(code_item),
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 16872f4..854927d 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -30,15 +30,11 @@
 class DexCache;
 }  // namespace mirror
 class ClassLinker;
-struct CompilationUnit;
 class VerifiedMethod;
 
 class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
  public:
-  explicit DexCompilationUnit(CompilationUnit* cu);
-
-  DexCompilationUnit(CompilationUnit* cu,
-                     jobject class_loader,
+  DexCompilationUnit(jobject class_loader,
                      ClassLinker* class_linker,
                      const DexFile& dex_file,
                      const DexFile::CodeItem* code_item,
@@ -48,10 +44,6 @@
                      const VerifiedMethod* verified_method,
                      Handle<mirror::DexCache> dex_cache);
 
-  CompilationUnit* GetCompilationUnit() const {
-    return cu_;
-  }
-
   jobject GetClassLoader() const {
     return class_loader_;
   }
@@ -121,8 +113,6 @@
   }
 
  private:
-  CompilationUnit* const cu_;
-
   const jobject class_loader_;
 
   ClassLinker* const class_linker_;
diff --git a/runtime/exception_test.cc b/compiler/exception_test.cc
similarity index 76%
rename from runtime/exception_test.cc
rename to compiler/exception_test.cc
index 18ccd08..38ac052 100644
--- a/runtime/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -16,6 +16,7 @@
 
 #include <memory>
 
+#include "base/arena_allocator.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -27,11 +28,11 @@
 #include "mirror/object-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "oat_quick_method_header.h"
+#include "optimizing/stack_map_stream.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -57,40 +58,27 @@
       fake_code_.push_back(0x70 | i);
     }
 
-    fake_mapping_data_.PushBackUnsigned(4);  // first element is count
-    fake_mapping_data_.PushBackUnsigned(4);  // total (non-length) elements
-    fake_mapping_data_.PushBackUnsigned(2);  // count of pc to dex elements
-                                      // ---  pc to dex table
-    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
-    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
-                                      // ---  dex to pc table
-    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
-    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    StackMapStream stack_maps(&allocator);
+    stack_maps.BeginStackMapEntry(/* dex_pc */ 3u,
+                                  /* native_pc_offset */ 3u,
+                                  /* register_mask */ 0u,
+                                  /* sp_mask */ nullptr,
+                                  /* num_dex_registers */ 0u,
+                                  /* inlining_depth */ 0u);
+    stack_maps.EndStackMapEntry();
+    size_t stack_maps_size = stack_maps.PrepareForFillIn();
+    size_t stack_maps_offset = stack_maps_size +  sizeof(OatQuickMethodHeader);
 
-    fake_vmap_table_data_.PushBackUnsigned(0 + VmapTable::kEntryAdjustment);
-
-    fake_gc_map_.push_back(0);  // 0 bytes to encode references and native pc offsets.
-    fake_gc_map_.push_back(0);
-    fake_gc_map_.push_back(0);  // 0 entries.
-    fake_gc_map_.push_back(0);
-
-    const std::vector<uint8_t>& fake_vmap_table_data = fake_vmap_table_data_.GetData();
-    const std::vector<uint8_t>& fake_mapping_data = fake_mapping_data_.GetData();
-    uint32_t vmap_table_offset = sizeof(OatQuickMethodHeader) + fake_vmap_table_data.size();
-    uint32_t mapping_table_offset = vmap_table_offset + fake_mapping_data.size();
-    uint32_t gc_map_offset = mapping_table_offset + fake_gc_map_.size();
-    OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
-                                       4 * sizeof(void*), 0u, 0u, code_size);
-    fake_header_code_and_maps_.resize(sizeof(method_header));
-    memcpy(&fake_header_code_and_maps_[0], &method_header, sizeof(method_header));
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_vmap_table_data.begin(), fake_vmap_table_data.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_mapping_data.begin(), fake_mapping_data.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(),
-                                      fake_gc_map_.begin(), fake_gc_map_.end());
-    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(),
-                                      fake_code_.begin(), fake_code_.end());
+    fake_header_code_and_maps_.resize(stack_maps_offset + fake_code_.size());
+    MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size);
+    stack_maps.FillIn(stack_maps_region);
+    OatQuickMethodHeader method_header(stack_maps_offset, 4 * sizeof(void*), 0u, 0u, code_size);
+    memcpy(&fake_header_code_and_maps_[stack_maps_size], &method_header, sizeof(method_header));
+    std::copy(fake_code_.begin(),
+              fake_code_.end(),
+              fake_header_code_and_maps_.begin() + stack_maps_offset);
 
     // Align the code.
     const size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
@@ -109,7 +97,7 @@
 
     if (kRuntimeISA == kArm) {
       // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
-      CHECK_ALIGNED(mapping_table_offset, 2);
+      CHECK_ALIGNED(stack_maps_offset, 2);
     }
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
@@ -124,9 +112,6 @@
   const DexFile* dex_;
 
   std::vector<uint8_t> fake_code_;
-  Leb128EncodingVector<> fake_mapping_data_;
-  Leb128EncodingVector<> fake_vmap_table_data_;
-  std::vector<uint8_t> fake_gc_map_;
   std::vector<uint8_t> fake_header_code_and_maps_;
 
   ArtMethod* method_f_;
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
deleted file mode 100644
index 2ef7f1a..0000000
--- a/compiler/gc_map_builder.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_GC_MAP_BUILDER_H_
-#define ART_COMPILER_GC_MAP_BUILDER_H_
-
-#include <vector>
-
-#include "base/bit_utils.h"
-#include "gc_map.h"
-
-namespace art {
-
-class GcMapBuilder {
- public:
-  template <typename Vector>
-  GcMapBuilder(Vector* table, size_t entries, uint32_t max_native_offset,
-               size_t references_width)
-      : entries_(entries), references_width_(entries != 0u ? references_width : 0u),
-        native_offset_width_(entries != 0 && max_native_offset != 0
-                             ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
-                             : 0u),
-        in_use_(entries) {
-    static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-
-    // Resize table and set up header.
-    table->resize((EntryWidth() * entries) + sizeof(uint32_t));
-    table_ = table->data();
-    CHECK_LT(native_offset_width_, 1U << 3);
-    (*table)[0] = native_offset_width_ & 7;
-    CHECK_LT(references_width_, 1U << 13);
-    (*table)[0] |= (references_width_ << 3) & 0xFF;
-    (*table)[1] = (references_width_ >> 5) & 0xFF;
-    CHECK_LT(entries, 1U << 16);
-    (*table)[2] = entries & 0xFF;
-    (*table)[3] = (entries >> 8) & 0xFF;
-  }
-
-  void AddEntry(uint32_t native_offset, const uint8_t* references) {
-    size_t table_index = TableIndex(native_offset);
-    while (in_use_[table_index]) {
-      table_index = (table_index + 1) % entries_;
-    }
-    in_use_[table_index] = true;
-    SetCodeOffset(table_index, native_offset);
-    DCHECK_EQ(native_offset, GetCodeOffset(table_index));
-    SetReferences(table_index, references);
-  }
-
- private:
-  size_t TableIndex(uint32_t native_offset) {
-    return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_;
-  }
-
-  uint32_t GetCodeOffset(size_t table_index) {
-    uint32_t native_offset = 0;
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      native_offset |= table_[table_offset + i] << (i * 8);
-    }
-    return native_offset;
-  }
-
-  void SetCodeOffset(size_t table_index, uint32_t native_offset) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      table_[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
-    }
-  }
-
-  void SetReferences(size_t table_index, const uint8_t* references) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    memcpy(&table_[table_offset + native_offset_width_], references, references_width_);
-  }
-
-  size_t EntryWidth() const {
-    return native_offset_width_ + references_width_;
-  }
-
-  // Number of entries in the table.
-  const size_t entries_;
-  // Number of bytes used to encode the reference bitmap.
-  const size_t references_width_;
-  // Number of bytes used to encode a native offset.
-  const size_t native_offset_width_;
-  // Entries that are in use.
-  std::vector<bool> in_use_;
-  // The table we're building.
-  uint8_t* table_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_GC_MAP_BUILDER_H_
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7779e44..91579e9 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -288,17 +288,14 @@
 }
 
 TEST_F(ImageTest, WriteReadUncompressed) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeUncompressed);
 }
 
 TEST_F(ImageTest, WriteReadLZ4) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4);
 }
 
 TEST_F(ImageTest, WriteReadLZ4HC) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4HC);
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c747ffa..00ff522 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -576,7 +576,16 @@
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
       bin = kBinString;  // Strings are almost always immutable (except for object header).
-    }  // else bin = kBinRegular
+    } else if (object->GetClass<kVerifyNone>() ==
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangObject)) {
+      // Instance of java lang object, probably a lock object. This means it will be dirty when we
+      // synchronize on it.
+      bin = kBinMiscDirty;
+    } else if (object->IsDexCache()) {
+      // Dex file field becomes dirty when the image is loaded.
+      bin = kBinMiscDirty;
+    }
+    // else bin = kBinRegular
   }
 
   size_t oat_index = GetOatIndex(object);
@@ -644,8 +653,7 @@
   for (ImageInfo& image_info : image_infos_) {
     ImageSection unused_sections[ImageHeader::kSectionCount];
     const size_t length = RoundUp(
-        image_info.CreateImageSections(target_ptr_size_, unused_sections),
-        kPageSize);
+        image_info.CreateImageSections(unused_sections), kPageSize);
 
     std::string error_msg;
     image_info.image_.reset(MemMap::MapAnonymous("image writer image",
@@ -1205,6 +1213,20 @@
           AssignMethodOffset(&m, type, oat_index);
         }
         (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
+
+        // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
+        // live.
+        if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
+          for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+            ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
+            DCHECK(imt_method != nullptr);
+            if (imt_method->IsRuntimeMethod() &&
+                !IsInBootImage(imt_method) &&
+                !NativeRelocationAssigned(imt_method)) {
+              AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+            }
+          }
+        }
       }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
@@ -1228,13 +1250,37 @@
   }
 }
 
+bool ImageWriter::NativeRelocationAssigned(void* ptr) const {
+  return native_object_relocations_.find(ptr) != native_object_relocations_.end();
+}
+
+void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
+  // No offset, or already assigned.
+  if (table == nullptr || NativeRelocationAssigned(table)) {
+    return;
+  }
+  CHECK(!IsInBootImage(table));
+  // If the method is a conflict method we also want to assign the conflict table offset.
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const size_t size = table->ComputeSize(target_ptr_size_);
+  native_object_relocations_.emplace(
+      table,
+      NativeObjectRelocation {
+          oat_index,
+          image_info.bin_slot_sizes_[kBinIMTConflictTable],
+          kNativeObjectRelocationTypeIMTConflictTable});
+  image_info.bin_slot_sizes_[kBinIMTConflictTable] += size;
+}
+
 void ImageWriter::AssignMethodOffset(ArtMethod* method,
                                      NativeObjectRelocationType type,
                                      size_t oat_index) {
   DCHECK(!IsInBootImage(method));
-  auto it = native_object_relocations_.find(method);
-  CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned "
+  CHECK(!NativeRelocationAssigned(method)) << "Method " << method << " already assigned "
       << PrettyMethod(method);
+  if (method->IsRuntimeMethod()) {
+    TryAssignConflictTableOffset(method->GetImtConflictTable(target_ptr_size_), oat_index);
+  }
   ImageInfo& image_info = GetImageInfo(oat_index);
   size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
   native_object_relocations_.emplace(method, NativeObjectRelocation { oat_index, offset, type });
@@ -1283,8 +1329,7 @@
   // know where image_roots is going to end up
   image_objects_offset_begin_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
-  heap->VisitObjects(WalkFieldsCallback, this);
+  const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
   // Write the image runtime methods.
   image_methods_[ImageHeader::kResolutionMethod] = runtime->GetResolutionMethod();
   image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod();
@@ -1294,31 +1339,19 @@
       runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
   image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
       runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
-
-  // Add room for fake length prefixed array for holding the image methods.
-  const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean;
-  auto it = native_object_relocations_.find(&image_method_array_);
-  CHECK(it == native_object_relocations_.end());
-  ImageInfo& default_image_info = GetImageInfo(GetDefaultOatIndex());
-  size_t& offset =
-      default_image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
-  if (!compile_app_image_) {
-    native_object_relocations_.emplace(&image_method_array_,
-        NativeObjectRelocation { GetDefaultOatIndex(), offset, image_method_type });
-  }
-  size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
-  const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
-      0, ArtMethod::Size(target_ptr_size_), method_alignment);
-  CHECK_ALIGNED_PARAM(array_size, method_alignment);
-  offset += array_size;
+  // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
     CHECK(m->IsRuntimeMethod());
     DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
     if (!IsInBootImage(m)) {
-      AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean, GetDefaultOatIndex());
+      AssignMethodOffset(m, kNativeObjectRelocationTypeRuntimeMethod, GetDefaultOatIndex());
     }
   }
+
+  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
+  heap->VisitObjects(WalkFieldsCallback, this);
+
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
 
@@ -1337,15 +1370,22 @@
   for (ImageInfo& image_info : image_infos_) {
     size_t bin_offset = image_objects_offset_begin_;
     for (size_t i = 0; i != kBinSize; ++i) {
+      switch (i) {
+        case kBinArtMethodClean:
+        case kBinArtMethodDirty: {
+          bin_offset = RoundUp(bin_offset, method_alignment);
+          break;
+        }
+        case kBinIMTConflictTable: {
+          bin_offset = RoundUp(bin_offset, target_ptr_size_);
+          break;
+        }
+        default: {
+          // Normal alignment.
+        }
+      }
       image_info.bin_slot_offsets_[i] = bin_offset;
       bin_offset += image_info.bin_slot_sizes_[i];
-      if (i == kBinArtField) {
-        static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields.");
-        static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4.");
-        DCHECK_ALIGNED(bin_offset, 4u);
-        DCHECK(method_alignment == 4u || method_alignment == 8u);
-        bin_offset = RoundUp(bin_offset, method_alignment);
-      }
     }
     // NOTE: There may be additional padding between the bin slots and the intern table.
     DCHECK_EQ(image_info.image_end_,
@@ -1358,9 +1398,7 @@
     image_info.image_begin_ = global_image_begin_ + image_offset;
     image_info.image_offset_ = image_offset;
     ImageSection unused_sections[ImageHeader::kSectionCount];
-    image_info.image_size_ = RoundUp(
-        image_info.CreateImageSections(target_ptr_size_, unused_sections),
-        kPageSize);
+    image_info.image_size_ = RoundUp(image_info.CreateImageSections(unused_sections), kPageSize);
     // There should be no gaps until the next image.
     image_offset += image_info.image_size_;
   }
@@ -1387,42 +1425,52 @@
   // Note that image_info.image_end_ is left at end of used mirror object section.
 }
 
-size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size,
-                                                   ImageSection* out_sections) const {
+size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const {
   DCHECK(out_sections != nullptr);
+
+  // Do not round up any sections here that are represented by the bins since it will break
+  // offsets.
+
   // Objects section
-  auto* objects_section = &out_sections[ImageHeader::kSectionObjects];
+  ImageSection* objects_section = &out_sections[ImageHeader::kSectionObjects];
   *objects_section = ImageSection(0u, image_end_);
-  size_t cur_pos = objects_section->End();
+
   // Add field section.
-  auto* field_section = &out_sections[ImageHeader::kSectionArtFields];
-  *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]);
+  ImageSection* field_section = &out_sections[ImageHeader::kSectionArtFields];
+  *field_section = ImageSection(bin_slot_offsets_[kBinArtField], bin_slot_sizes_[kBinArtField]);
   CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
-  cur_pos = field_section->End();
-  // Round up to the alignment the required by the method section.
-  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size));
+
   // Add method section.
-  auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
-  *methods_section = ImageSection(cur_pos,
-                                  bin_slot_sizes_[kBinArtMethodClean] +
-                                      bin_slot_sizes_[kBinArtMethodDirty]);
-  CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
-  cur_pos = methods_section->End();
+  ImageSection* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
+  *methods_section = ImageSection(
+      bin_slot_offsets_[kBinArtMethodClean],
+      bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
+
+  // Conflict tables section.
+  ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
+  *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
+                                              bin_slot_sizes_[kBinIMTConflictTable]);
+
+  // Runtime methods section.
+  ImageSection* runtime_methods_section = &out_sections[ImageHeader::kSectionRuntimeMethods];
+  *runtime_methods_section = ImageSection(bin_slot_offsets_[kBinRuntimeMethod],
+                                          bin_slot_sizes_[kBinRuntimeMethod]);
+
   // Add dex cache arrays section.
-  auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
-  *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]);
-  CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
-  cur_pos = dex_cache_arrays_section->End();
+  ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
+  *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
+                                           bin_slot_sizes_[kBinDexCacheArray]);
+
   // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
-  cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
+  size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t));
   // Calculate the size of the interned strings.
-  auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
+  ImageSection* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
   // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
   cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the class table section.
-  auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
+  ImageSection* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
   *class_table_section = ImageSection(cur_pos, class_table_bytes_);
   cur_pos = class_table_section->End();
   // Image end goes right before the start of the image bitmap.
@@ -1437,7 +1485,7 @@
 
   // Create the image sections.
   ImageSection sections[ImageHeader::kSectionCount];
-  const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections);
+  const size_t image_end = image_info.CreateImageSections(sections);
 
   // Finally bitmap section.
   const size_t bitmap_bytes = image_info.image_bitmap_->Size();
@@ -1522,8 +1570,20 @@
   ImageWriter* const image_writer_;
 };
 
+void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
+  const size_t count = orig->NumEntries(target_ptr_size_);
+  for (size_t i = 0; i < count; ++i) {
+    ArtMethod* interface_method = orig->GetInterfaceMethod(i, target_ptr_size_);
+    ArtMethod* implementation_method = orig->GetImplementationMethod(i, target_ptr_size_);
+    copy->SetInterfaceMethod(i, target_ptr_size_, NativeLocationInImage(interface_method));
+    copy->SetImplementationMethod(i,
+                                  target_ptr_size_,
+                                  NativeLocationInImage(implementation_method));
+  }
+}
+
 void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
-  ImageInfo& image_info = GetImageInfo(oat_index);
+  const ImageInfo& image_info = GetImageInfo(oat_index);
   // Copy ArtFields and methods to their locations and update the array for convenience.
   for (auto& pair : native_object_relocations_) {
     NativeObjectRelocation& relocation = pair.second;
@@ -1541,6 +1601,7 @@
             GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass()));
         break;
       }
+      case kNativeObjectRelocationTypeRuntimeMethod:
       case kNativeObjectRelocationTypeArtMethodClean:
       case kNativeObjectRelocationTypeArtMethodDirty: {
         CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first),
@@ -1566,26 +1627,22 @@
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
+      case kNativeObjectRelocationTypeIMTConflictTable: {
+        auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
+        CopyAndFixupImtConflictTable(
+            orig_table,
+            new(dest)ImtConflictTable(orig_table->NumEntries(target_ptr_size_), target_ptr_size_));
+        break;
+      }
     }
   }
   // Fixup the image method roots.
   auto* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin());
-  const ImageSection& methods_section = image_header->GetMethodsSection();
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
     ArtMethod* method = image_methods_[i];
     CHECK(method != nullptr);
-    // Only place runtime methods in the image of the default oat file.
-    if (method->IsRuntimeMethod() && oat_index != GetDefaultOatIndex()) {
-      continue;
-    }
     if (!IsInBootImage(method)) {
-      auto it = native_object_relocations_.find(method);
-      CHECK(it != native_object_relocations_.end()) << "No forwarding for " << PrettyMethod(method);
-      NativeObjectRelocation& relocation = it->second;
-      CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
-          << methods_section;
-      CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
-      method = reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset);
+      method = NativeLocationInImage(method);
     }
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
@@ -2048,24 +2105,28 @@
 
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
-    copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
-  } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
-                      orig == runtime->GetImtUnimplementedMethod())) {
-    copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
-  } else if (UNLIKELY(orig->IsRuntimeMethod())) {
-    bool found_one = false;
-    for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
-      auto idx = static_cast<Runtime::CalleeSaveType>(i);
-      if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) {
-        found_one = true;
-        break;
+  if (orig->IsRuntimeMethod()) {
+    ImtConflictTable* orig_table = orig->GetImtConflictTable(target_ptr_size_);
+    if (orig_table != nullptr) {
+      // Special IMT conflict method, normal IMT conflict method or unimplemented IMT method.
+      copy->SetEntryPointFromQuickCompiledCodePtrSize(
+          GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
+      copy->SetImtConflictTable(NativeLocationInImage(orig_table), target_ptr_size_);
+    } else if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
+      copy->SetEntryPointFromQuickCompiledCodePtrSize(
+          GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
+    } else {
+      bool found_one = false;
+      for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
+        auto idx = static_cast<Runtime::CalleeSaveType>(i);
+        if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) {
+          found_one = true;
+          break;
+        }
       }
+      CHECK(found_one) << "Expected to find callee save method but got " << PrettyMethod(orig);
+      CHECK(copy->IsRuntimeMethod());
     }
-    CHECK(found_one) << "Expected to find callee save method but got " << PrettyMethod(orig);
-    CHECK(copy->IsRuntimeMethod());
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
@@ -2132,6 +2193,10 @@
       return kBinArtMethodDirty;
     case kNativeObjectRelocationTypeDexCacheArray:
       return kBinDexCacheArray;
+    case kNativeObjectRelocationTypeRuntimeMethod:
+      return kBinRuntimeMethod;
+    case kNativeObjectRelocationTypeIMTConflictTable:
+      return kBinIMTConflictTable;
   }
   UNREACHABLE();
 }
@@ -2233,7 +2298,6 @@
       compile_app_image_(compile_app_image),
       target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
       image_infos_(oat_filenames.size()),
-      image_method_array_(ImageHeader::kImageMethodsCount),
       dirty_methods_(0u),
       clean_methods_(0u),
       image_storage_mode_(image_storage_mode),
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index f204b28..51976c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -149,16 +149,17 @@
   void RecordImageAllocations() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Classify different kinds of bins that objects end up getting packed into during image writing.
+  // Ordered from dirtiest to cleanest (until ArtMethods).
   enum Bin {
-    // Likely-clean:
-    kBinString,                        // [String] Almost always immutable (except for obj header).
+    kBinMiscDirty,                // Dex caches, object locks, etc...
+    kBinClassVerified,            // Class verified, but initializers haven't been run
     // Unknown mix of clean/dirty:
     kBinRegular,
-    // Likely-dirty:
+    kBinClassInitialized,         // Class initializers have been run
     // All classes get their own bins since their fields often dirty
     kBinClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
-    kBinClassInitialized,         // Class initializers have been run
-    kBinClassVerified,            // Class verified, but initializers haven't been run
+    // Likely-clean:
+    kBinString,                        // [String] Almost always immutable (except for obj header).
     // Add more bins here if we add more segregation code.
     // Non mirror fields must be below.
     // ArtFields should be always clean.
@@ -168,6 +169,10 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
+    // Conflict tables (clean).
+    kBinIMTConflictTable,
+    // Runtime methods (always clean, do not have a length prefix array).
+    kBinRuntimeMethod,
     // Dex cache arrays have a special slot for PC-relative addressing. Since they are
     // huge, and as such their dirtiness is not important for the clean/dirty separation,
     // we arbitrarily keep them at the end of the native data.
@@ -185,6 +190,8 @@
     kNativeObjectRelocationTypeArtMethodArrayClean,
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
+    kNativeObjectRelocationTypeRuntimeMethod,
+    kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
   friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type);
@@ -239,7 +246,7 @@
 
     // Create the image sections into the out sections variable, returns the size of the image
     // excluding the bitmap.
-    size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const;
+    size_t CreateImageSections(ImageSection* out_sections) const;
 
     std::unique_ptr<MemMap> image_;  // Memory mapped for generating the image.
 
@@ -394,6 +401,8 @@
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupObject(mirror::Object* orig, mirror::Object* copy)
@@ -424,6 +433,11 @@
                           size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
+  // relocation.
+  void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Return true if klass is loaded by the boot class loader but not in the boot image.
   bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -480,6 +494,9 @@
   // remove duplicates in the multi image and app image case.
   mirror::String* FindInternedString(mirror::String* string) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Return true if there already exists a native allocation for an object.
+  bool NativeRelocationAssigned(void* ptr) const;
+
   const CompilerDriver& compiler_driver_;
 
   // Beginning target image address for the first image.
@@ -516,16 +533,14 @@
 
     bool IsArtMethodRelocation() const {
       return type == kNativeObjectRelocationTypeArtMethodClean ||
-          type == kNativeObjectRelocationTypeArtMethodDirty;
+          type == kNativeObjectRelocationTypeArtMethodDirty ||
+          type == kNativeObjectRelocationTypeRuntimeMethod;
     }
   };
   std::unordered_map<void*, NativeObjectRelocation> native_object_relocations_;
 
   // Runtime ArtMethods which aren't reachable from any Class but need to be copied into the image.
   ArtMethod* image_methods_[ImageHeader::kImageMethodsCount];
-  // Fake length prefixed array for image methods. This array does not contain the actual
-  // ArtMethods. We only use it for the header and relocation addresses.
-  LengthPrefixedArray<ArtMethod> image_method_array_;
 
   // Counters for measurements, used for logging only.
   uint64_t dirty_methods_;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index cda2e27..1785338 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -69,9 +69,9 @@
   DCHECK(jit_compiler != nullptr);
   if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
     const ArrayRef<mirror::Class*> types_array(types, count);
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
         kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array);
-    CreateJITCodeEntry(std::unique_ptr<const uint8_t[]>(elf_file.data()), elf_file.size());
+    CreateJITCodeEntry(std::move(elf_file));
   }
 }
 
@@ -88,7 +88,7 @@
 
 JitCompiler::JitCompiler() {
   compiler_options_.reset(new CompilerOptions(
-      CompilerOptions::kDefaultCompilerFilter,
+      CompilerFilter::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
       CompilerOptions::kDefaultLargeMethodThreshold,
       CompilerOptions::kDefaultSmallMethodThreshold,
@@ -155,7 +155,8 @@
       Compiler::kOptimizing,
       instruction_set,
       instruction_set_features_.get(),
-      /* image */ false,
+      /* boot_image */ false,
+      /* app_image */ false,
       /* image_classes */ nullptr,
       /* compiled_classes */ nullptr,
       /* compiled_methods */ nullptr,
@@ -171,7 +172,7 @@
 
   size_t thread_count = compiler_driver_->GetThreadCount();
   if (compiler_options_->GetGenerateDebugInfo()) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     const char* prefix = "/data/misc/trace";
 #else
     const char* prefix = "/tmp";
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 8832c84..371019a 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -18,6 +18,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
@@ -28,7 +29,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class JNICFITest : public CFITest {
  public:
@@ -42,15 +43,19 @@
     const bool is_static = true;
     const bool is_synchronized = false;
     const char* shorty = "IIFII";
+
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
     std::unique_ptr<JniCallingConvention> jni_conv(
-        JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-        ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
     const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
     jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
@@ -89,6 +94,6 @@
 TEST_ISA(kMips)
 TEST_ISA(kMips64)
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index cf836a9..c4c2399 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/stack_trace_element.h"
+#include "nativeloader/native_loader.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
@@ -53,6 +54,11 @@
     check_generic_jni_ = false;
   }
 
+  void TearDown() OVERRIDE {
+    android::ResetNativeLoader();
+    CommonCompilerTest::TearDown();
+  }
+
   void SetCheckGenericJni(bool generic) {
     check_generic_jni_ = generic;
   }
@@ -92,11 +98,13 @@
       CompileForTest(class_loader_, direct, method_name, method_sig);
       // Start runtime.
       Thread::Current()->TransitionFromSuspendedToRunnable();
+      android::InitializeNativeLoader();
       bool started = runtime_->Start();
       CHECK(started);
     }
     // JNI operations after runtime start.
     env_ = Thread::Current()->GetJniEnv();
+    library_search_path_ = env_->NewStringUTF("");
     jklass_ = env_->FindClass("MyClassNatives");
     ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig;
 
@@ -168,6 +176,7 @@
   void StackArgsSignExtendedMips64Impl();
 
   JNIEnv* env_;
+  jstring library_search_path_;
   jmethodID jmethod_;
   bool check_generic_jni_;
 };
@@ -220,7 +229,7 @@
 
   std::string reason;
   ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
-                  LoadNativeLibrary(env_, "", class_loader_, nullptr, &reason))
+                  LoadNativeLibrary(env_, "", class_loader_, library_search_path_, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -235,7 +244,7 @@
 
   std::string reason;
   ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
-                  LoadNativeLibrary(env_, "", class_loader_, nullptr, &reason))
+                  LoadNativeLibrary(env_, "", class_loader_, library_search_path_, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index cef8c5d..e21f554 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -46,37 +46,51 @@
 
 // Managed runtime calling convention
 
-ManagedRuntimeCallingConvention* ManagedRuntimeCallingConvention::Create(
-    bool is_static, bool is_synchronized, const char* shorty, InstructionSet instruction_set) {
+std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention::Create(
+    ArenaAllocator* arena,
+    bool is_static,
+    bool is_synchronized,
+    const char* shorty,
+    InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm64::Arm64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips::MipsManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips64::Mips64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86_64::X86_64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
@@ -132,38 +146,46 @@
 
 // JNI calling convention
 
-JniCallingConvention* JniCallingConvention::Create(bool is_static, bool is_synchronized,
-                                                   const char* shorty,
-                                                   InstructionSet instruction_set) {
+std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocator* arena,
+                                                                   bool is_static,
+                                                                   bool is_synchronized,
+                                                                   const char* shorty,
+                                                                   InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm::ArmJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips::MipsJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86::X86JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 243d124..2c4b15c 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -18,6 +18,8 @@
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
 #include <vector>
+
+#include "base/arena_object.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
@@ -26,7 +28,7 @@
 namespace art {
 
 // Top-level abstraction for different calling conventions.
-class CallingConvention {
+class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConvention> {
  public:
   bool IsReturnAReference() const { return shorty_[0] == 'L'; }
 
@@ -221,9 +223,11 @@
 // | { Method* }             | <-- SP
 class ManagedRuntimeCallingConvention : public CallingConvention {
  public:
-  static ManagedRuntimeCallingConvention* Create(bool is_static, bool is_synchronized,
-                                                 const char* shorty,
-                                                 InstructionSet instruction_set);
+  static std::unique_ptr<ManagedRuntimeCallingConvention> Create(ArenaAllocator* arena,
+                                                                 bool is_static,
+                                                                 bool is_synchronized,
+                                                                 const char* shorty,
+                                                                 InstructionSet instruction_set);
 
   // Register that holds the incoming method argument
   virtual ManagedRegister MethodRegister() = 0;
@@ -249,7 +253,9 @@
   virtual const ManagedRegisterEntrySpills& EntrySpills() = 0;
 
  protected:
-  ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty,
+  ManagedRuntimeCallingConvention(bool is_static,
+                                  bool is_synchronized,
+                                  const char* shorty,
                                   size_t frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 };
@@ -270,8 +276,11 @@
 // callee saves for frames above this one.
 class JniCallingConvention : public CallingConvention {
  public:
-  static JniCallingConvention* Create(bool is_static, bool is_synchronized, const char* shorty,
-                                      InstructionSet instruction_set);
+  static std::unique_ptr<JniCallingConvention> Create(ArenaAllocator* arena,
+                                                      bool is_static,
+                                                      bool is_synchronized,
+                                                      const char* shorty,
+                                                      InstructionSet instruction_set);
 
   // Size of frame excluding space for outgoing args (its assumed Method* is
   // always at the bottom of a frame, but this doesn't work for outgoing
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index e920460..27714b8 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -22,6 +22,7 @@
 #include <fstream>
 
 #include "art_method.h"
+#include "base/arena_allocator.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "calling_convention.h"
@@ -69,13 +70,18 @@
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
+
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, instruction_set));
   bool reference_return = main_jni_conv->IsReturnAReference();
 
   std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-      ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      ManagedRuntimeCallingConvention::Create(
+          &arena, is_static, is_synchronized, shorty, instruction_set));
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
   //     method and the current thread.
@@ -90,11 +96,12 @@
     jni_end_shorty = "V";
   }
 
-  std::unique_ptr<JniCallingConvention> end_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
+  std::unique_ptr<JniCallingConvention> end_jni_conv(JniCallingConvention::Create(
+      &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
+  std::unique_ptr<Assembler> jni_asm(
+      Assembler::Create(&arena, instruction_set, instruction_set_features));
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
@@ -488,9 +495,7 @@
                                                  main_jni_conv->CoreSpillMask(),
                                                  main_jni_conv->FpSpillMask(),
                                                  ArrayRef<const SrcMapElem>(),
-                                                 ArrayRef<const uint8_t>(),  // mapping_table.
                                                  ArrayRef<const uint8_t>(),  // vmap_table.
-                                                 ArrayRef<const uint8_t>(),  // native_gc_map.
                                                  ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
                                                  ArrayRef<const LinkerPatch>());
 }
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 682b008..d4dd978 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -112,7 +112,7 @@
     }
   }
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchCallRelative) {
+    if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
       unprocessed_patches_.emplace_back(patch.TargetMethod(),
                                         quick_code_offset + patch.LiteralOffset());
     }
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index c090dff..fa49fc4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -56,10 +56,10 @@
   SetInsn32(code, literal_offset, value);
 }
 
-void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                   const LinkerPatch& patch,
-                                                   uint32_t patch_offset,
-                                                   uint32_t target_offset) {
+void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                     const LinkerPatch& patch,
+                                                     uint32_t patch_offset,
+                                                     uint32_t target_offset) {
   uint32_t literal_offset = patch.LiteralOffset();
   uint32_t pc_literal_offset = patch.PcInsnOffset();
   uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
@@ -79,7 +79,9 @@
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm::Thumb2Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm::Thumb2Assembler assembler(&arena);
   assembler.LoadFromOffset(
       arm::kLoadWord, arm::PC, arm::R0,
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 0d903c0..d85739c 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -30,10 +30,10 @@
                  uint32_t literal_offset,
                  uint32_t patch_offset,
                  uint32_t target_offset) OVERRIDE;
-  void PatchDexCacheReference(std::vector<uint8_t>* code,
-                              const LinkerPatch& patch,
-                              uint32_t patch_offset,
-                              uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 
  private:
   static std::vector<uint8_t> CompileThunkCode();
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index a259cda..a8078e3 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -30,6 +30,9 @@
   static const ArrayRef<const uint8_t> kCallCode;
   static const uint8_t kNopRawCode[];
   static const ArrayRef<const uint8_t> kNopCode;
+  static const uint8_t kUnpatchedPcRelativeRawCode[];
+  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
+  static const uint32_t kPcInsnOffset;
 
   // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
   static constexpr uint32_t kBlPlus0 = 0xf000f800;
@@ -123,47 +126,9 @@
     return result;
   }
 
-  void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
-    static const uint8_t raw_code[] = {
-        0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
-        0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
-        0x78, 0x44,               // ADD r0, pc
-    };
-    constexpr uint32_t pc_insn_offset = 8u;
-    const ArrayRef<const uint8_t> code(raw_code);
-    LinkerPatch patches[] = {
-        LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
-        LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
-    };
-    AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
-    Link();
-
-    uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
-    uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
-    // Distribute the bits of the diff between the MOVW and MOVT:
-    uint32_t diffw = diff & 0xffffu;
-    uint32_t difft = diff >> 16;
-    uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
-        ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-        ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-        ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-        ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
-    uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
-        ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-        ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-        ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-        ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
-    const uint8_t expected_code[] = {
-        static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
-        static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
-        static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
-        static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
-        0x78, 0x44,
-    };
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  }
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -178,6 +143,67 @@
 
 const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode);
 
+const uint8_t Thumb2RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
+    0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
+    0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
+    0x78, 0x44,               // ADD r0, pc
+};
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCode(
+    kUnpatchedPcRelativeRawCode);
+const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u;
+
+void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                      uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
+      LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
+      LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                     uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t pc_base_offset = method1_offset + kPcInsnOffset + 4u /* PC adjustment */;
+  uint32_t diff = target_offset - pc_base_offset;
+  // Distribute the bits of the diff between the MOVW and MOVT:
+  uint32_t diffw = diff & 0xffffu;
+  uint32_t difft = diff >> 16;
+  uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
+      ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+      ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+      ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+      ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
+  uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
+      ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+      ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+      ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+      ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
+  const uint8_t expected_code[] = {
+      static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+      static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+      static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+      static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+      0x78, 0x44,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
 TEST_F(Thumb2RelativePatcherTest, CallSelf) {
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
@@ -366,23 +392,43 @@
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
-  TestDexCachereference(0x00ff0000u, 0x00fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference1) {
+  TestDexCacheReference(0x00ff0000u, 0x00fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
-  TestDexCachereference(0x02ff0000u, 0x05fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference2) {
+  TestDexCacheReference(0x02ff0000u, 0x05fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
-  TestDexCachereference(0x08ff0000u, 0x08fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference3) {
+  TestDexCacheReference(0x08ff0000u, 0x08fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
-  TestDexCachereference(0xd0ff0000u, 0x60fcu);
+TEST_F(Thumb2RelativePatcherTest, DexCacheReference4) {
+  TestDexCacheReference(0xd0ff0000u, 0x60fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference1) {
+  TestStringReference(0x00ff00fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference2) {
+  TestStringReference(0x02ff05fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference3) {
+  TestStringReference(0x08ff08fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, StringReference4) {
+  TestStringReference(0xd0ff60fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index a81c85c..7277107 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -28,6 +28,17 @@
 namespace art {
 namespace linker {
 
+namespace {
+
+inline bool IsAdrpPatch(const LinkerPatch& patch) {
+  LinkerPatch::Type type = patch.GetType();
+  return
+      (type == LinkerPatch::Type::kStringRelative || type == LinkerPatch::Type::kDexCacheArray) &&
+      patch.LiteralOffset() == patch.PcInsnOffset();
+}
+
+}  // anonymous namespace
+
 Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
                                            const Arm64InstructionSetFeatures* features)
     : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(),
@@ -61,8 +72,7 @@
   size_t num_adrp = 0u;
   DCHECK(compiled_method != nullptr);
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchDexCacheArray &&
-        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+    if (IsAdrpPatch(patch)) {
       ++num_adrp;
     }
   }
@@ -78,8 +88,7 @@
   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
   DCHECK(compiled_method != nullptr);
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (patch.Type() == kLinkerPatchDexCacheArray &&
-        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+    if (IsAdrpPatch(patch)) {
       uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
       if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
         adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
@@ -151,10 +160,10 @@
   SetInsn(code, literal_offset, insn);
 }
 
-void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                  const LinkerPatch& patch,
-                                                  uint32_t patch_offset,
-                                                  uint32_t target_offset) {
+void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                    const LinkerPatch& patch,
+                                                    uint32_t patch_offset,
+                                                    uint32_t target_offset) {
   DCHECK_EQ(patch_offset & 3u, 0u);
   DCHECK_EQ(target_offset & 3u, 0u);
   uint32_t literal_offset = patch.LiteralOffset();
@@ -199,8 +208,22 @@
     // Write the new ADRP (or B to the erratum 843419 thunk).
     SetInsn(code, literal_offset, insn);
   } else {
-    // LDR 32-bit or 64-bit with imm12 == 0 (unset).
-    DCHECK_EQ(insn & 0xbffffc00, 0xb9400000) << insn;
+    if ((insn & 0xfffffc00) == 0x91000000) {
+      // ADD immediate, 64-bit with imm12 == 0 (unset).
+      if (!kEmitCompilerReadBarrier) {
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
+      } else {
+        // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+        // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
+      }
+      shift = 0u;  // No shift for ADD.
+    } else {
+      // LDR 32-bit or 64-bit with imm12 == 0 (unset).
+      DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
+      DCHECK_EQ(insn & 0xbffffc00, 0xb9400000) << std::hex << insn;
+    }
     if (kIsDebugBuild) {
       uint32_t adrp = GetInsn(code, pc_insn_offset);
       if ((adrp & 0x9f000000u) != 0x90000000u) {
@@ -231,7 +254,9 @@
 std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm64::Arm64Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm64::Arm64Assembler assembler(&arena);
   Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kArm64PointerSize).Int32Value());
   assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
@@ -263,7 +288,7 @@
   DCHECK_EQ(patch_offset & 0x3u, 0u);
   if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
     uint32_t adrp = GetInsn(code, literal_offset);
-    DCHECK_EQ(adrp & 0xff000000, 0x90000000);
+    DCHECK_EQ(adrp & 0x9f000000, 0x90000000);
     uint32_t next_offset = patch_offset + 4u;
     uint32_t next_insn = GetInsn(code, literal_offset + 4u);
 
@@ -277,6 +302,15 @@
       return false;
     }
 
+    // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
+    // for an ADD immediate, check for that as well. We generalize a bit to include
+    // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
+    // the result to a different register.
+    if ((next_insn & 0x1f000000) == 0x11000000 &&
+        ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
+      return false;
+    }
+
     // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
     if ((next_insn & 0xff000000) == 0x18000000) {
       return false;
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index f9b76e6..48ad105 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -37,10 +37,10 @@
                  uint32_t literal_offset,
                  uint32_t patch_offset,
                  uint32_t target_offset) OVERRIDE;
-  void PatchDexCacheReference(std::vector<uint8_t>* code,
-                              const LinkerPatch& patch,
-                              uint32_t patch_offset,
-                              uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 
  private:
   static std::vector<uint8_t> CompileThunkCode();
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 0bfef5e..09729fd 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -40,6 +40,15 @@
   static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
   static constexpr uint32_t kBlMinusMax = 0x96000000u;
 
+  // LDR immediate, 32-bit.
+  static constexpr uint32_t kLdrWInsn = 0xb9400000u;
+
+  // ADD/ADDS/SUB/SUBS immediate, 64-bit.
+  static constexpr uint32_t kAddXInsn = 0x91000000u;
+  static constexpr uint32_t kAddsXInsn = 0xb1000000u;
+  static constexpr uint32_t kSubXInsn = 0xd1000000u;
+  static constexpr uint32_t kSubsXInsn = 0xf1000000u;
+
   // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
   static constexpr uint32_t kLdurInsn = 0xf840405fu;
 
@@ -109,7 +118,7 @@
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
-    CHECK_EQ(result.second & 3u, 0u);
+    CHECK_ALIGNED(result.second, 4u);
     return result.second;
   }
 
@@ -147,20 +156,29 @@
     return result;
   }
 
-  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
-                                         uint32_t method_offset, uint32_t target_offset) {
+  std::vector<uint8_t> GenNopsAndAdrpAndUse(size_t num_nops,
+                                            uint32_t method_offset,
+                                            uint32_t target_offset,
+                                            uint32_t use_insn) {
     std::vector<uint8_t> result;
     result.reserve(num_nops * 4u + 8u);
     for (size_t i = 0; i != num_nops; ++i) {
       result.insert(result.end(), kNopCode.begin(), kNopCode.end());
     }
-    DCHECK_EQ(method_offset & 3u, 0u);
-    DCHECK_EQ(target_offset & 3u, 0u);
+    CHECK_ALIGNED(method_offset, 4u);
+    CHECK_ALIGNED(target_offset, 4u);
     uint32_t adrp_offset = method_offset + num_nops * 4u;
     uint32_t disp = target_offset - (adrp_offset & ~0xfffu);
-    DCHECK_EQ(disp & 3u, 0u);
-    uint32_t ldr = 0xb9400001 |               // LDR w1, [x0, #(imm12 * 2)]
-        ((disp & 0xfffu) << (10 - 2));        // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+    if (use_insn == kLdrWInsn) {
+      DCHECK_ALIGNED(disp, 1u << 2);
+      use_insn |= 1 |                         // LDR x1, [x0, #(imm12 << 2)]
+          ((disp & 0xfffu) << (10 - 2));      // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+    } else if (use_insn == kAddXInsn) {
+      use_insn |= 1 |                         // ADD x1, x0, #imm
+          (disp & 0xfffu) << 10;              // imm12 = (disp & 0xfffu) is at bit 10.
+    } else {
+      LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
+    }
     uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
         ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
         ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
@@ -170,13 +188,19 @@
     result.push_back(static_cast<uint8_t>(adrp >> 8));
     result.push_back(static_cast<uint8_t>(adrp >> 16));
     result.push_back(static_cast<uint8_t>(adrp >> 24));
-    result.push_back(static_cast<uint8_t>(ldr));
-    result.push_back(static_cast<uint8_t>(ldr >> 8));
-    result.push_back(static_cast<uint8_t>(ldr >> 16));
-    result.push_back(static_cast<uint8_t>(ldr >> 24));
+    result.push_back(static_cast<uint8_t>(use_insn));
+    result.push_back(static_cast<uint8_t>(use_insn >> 8));
+    result.push_back(static_cast<uint8_t>(use_insn >> 16));
+    result.push_back(static_cast<uint8_t>(use_insn >> 24));
     return result;
   }
 
+  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
+                                         uint32_t method_offset,
+                                         uint32_t target_offset) {
+    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn);
+  }
+
   void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
     dex_cache_arrays_begin_ = dex_cache_arrays_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
@@ -184,7 +208,8 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
                       ArrayRef<const LinkerPatch>(patches));
     Link();
 
@@ -194,6 +219,30 @@
     EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
   }
 
+  std::vector<uint8_t> GenNopsAndAdrpAdd(size_t num_nops,
+                                         uint32_t method_offset,
+                                         uint32_t target_offset) {
+    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kAddXInsn);
+  }
+
+  void TestNopsAdrpAdd(size_t num_nops, uint32_t string_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_offset);
+    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
+    LinkerPatch patches[] = {
+        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
+    };
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    auto expected_code = GenNopsAndAdrpAdd(num_nops, method1_offset, string_offset);
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
+
   void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
     CHECK_LE(pos, code->size());
     const uint8_t insn_code[] = {
@@ -204,8 +253,10 @@
     code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
   }
 
-  void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
-                               uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
+                               uint32_t insn2,
+                               uint32_t dex_cache_arrays_begin,
+                               uint32_t element_offset) {
     dex_cache_arrays_begin_ = dex_cache_arrays_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
     InsertInsn(&code, num_nops * 4u + 4u, insn2);
@@ -213,26 +264,41 @@
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
         LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
     };
-    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
                       ArrayRef<const LinkerPatch>(patches));
     Link();
   }
 
-  void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
-                            uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+  void PrepareNopsAdrpInsn2Add(size_t num_nops, uint32_t insn2, uint32_t string_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_offset);
+    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
+    InsertInsn(&code, num_nops * 4u + 4u, insn2);
+    LinkerPatch patches[] = {
+        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
+    };
+    AddCompiledMethod(MethodRef(1u),
+                      ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+  }
 
+  void TestNopsAdrpInsn2AndUse(size_t num_nops,
+                               uint32_t insn2,
+                               uint32_t target_offset,
+                               uint32_t use_insn) {
     uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
-    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
     InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
     EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
   }
 
-  void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2,
-                                    uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
-
+  void TestNopsAdrpInsn2AndUseHasThunk(size_t num_nops,
+                                       uint32_t insn2,
+                                       uint32_t target_offset,
+                                       uint32_t use_insn) {
     uint32_t method1_offset = GetMethodOffset(1u);
     CHECK(!compiled_method_refs_.empty());
     CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u);
@@ -240,13 +306,12 @@
     uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size();
     uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64);
     uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
-    ASSERT_EQ(b_diff & 3u, 0u);
+    CHECK_ALIGNED(b_diff, 4u);
     ASSERT_LT(b_diff, 128 * MB);
     uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu);
     uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu);
 
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
-    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
     InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
     // Replace adrp with bl.
     expected_code.erase(expected_code.begin() + num_nops * 4u,
@@ -270,29 +335,39 @@
     }
   }
 
-  void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk,
-                        uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpInsn2Ldr(uint32_t insn2,
+                        uint32_t adrp_offset,
+                        bool has_thunk,
+                        uint32_t dex_cache_arrays_begin,
+                        uint32_t element_offset) {
     uint32_t method1_offset =
         CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
-    ASSERT_EQ(adrp_offset & 3u, 0u);
+    CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
     if (has_thunk) {
-      TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn);
     } else {
-      TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2AndUse(num_nops, insn2, target_offset, kLdrWInsn);
     }
     ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
   }
 
-  void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
-                       uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdurLdr(uint32_t adrp_offset,
+                       bool has_thunk,
+                       uint32_t dex_cache_arrays_begin,
+                       uint32_t element_offset) {
     TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
 
-  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp,
-                           uint32_t adrp_offset, bool has_thunk,
-                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn,
+                           int32_t pcrel_disp,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t dex_cache_arrays_begin,
+                           uint32_t element_offset) {
     ASSERT_LT(pcrel_disp, 0x100000);
     ASSERT_GE(pcrel_disp, -0x100000);
     ASSERT_EQ(pcrel_disp & 0x3, 0);
@@ -300,13 +375,60 @@
     TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
 
-  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units,
-                           uint32_t adrp_offset, bool has_thunk,
-                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn,
+                           uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t dex_cache_arrays_begin,
+                           uint32_t element_offset) {
     ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
     uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
     TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
   }
+
+  void TestAdrpInsn2Add(uint32_t insn2,
+                        uint32_t adrp_offset,
+                        bool has_thunk,
+                        uint32_t string_offset) {
+    uint32_t method1_offset =
+        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    ASSERT_LT(method1_offset, adrp_offset);
+    CHECK_ALIGNED(adrp_offset, 4u);
+    uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+    PrepareNopsAdrpInsn2Add(num_nops, insn2, string_offset);
+    if (has_thunk) {
+      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, string_offset, kAddXInsn);
+    } else {
+      TestNopsAdrpInsn2AndUse(num_nops, insn2, string_offset, kAddXInsn);
+    }
+    ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
+  }
+
+  void TestAdrpLdurAdd(uint32_t adrp_offset, bool has_thunk, uint32_t string_offset) {
+    TestAdrpInsn2Add(kLdurInsn, adrp_offset, has_thunk, string_offset);
+  }
+
+  void TestAdrpLdrPcRelAdd(uint32_t pcrel_ldr_insn,
+                           int32_t pcrel_disp,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t string_offset) {
+    ASSERT_LT(pcrel_disp, 0x100000);
+    ASSERT_GE(pcrel_disp, -0x100000);
+    ASSERT_EQ(pcrel_disp & 0x3, 0);
+    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
+    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
+  }
+
+  void TestAdrpLdrSpRelAdd(uint32_t sprel_ldr_insn,
+                           uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset,
+                           bool has_thunk,
+                           uint32_t string_offset) {
+    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
+    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
+    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
+  }
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -358,14 +480,14 @@
   uint32_t method1_offset = GetMethodOffset(1u);
   uint32_t method2_offset = GetMethodOffset(2u);
   uint32_t diff_after = method2_offset - method1_offset;
-  ASSERT_EQ(diff_after & 3u, 0u);
+  CHECK_ALIGNED(diff_after, 4u);
   ASSERT_LT(diff_after >> 2, 1u << 8);  // Simple encoding, (diff_after >> 2) fits into 8 bits.
   static const uint8_t method1_expected_code[] = {
       static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - method2_offset;
-  ASSERT_EQ(diff_before & 3u, 0u);
+  CHECK_ALIGNED(diff_before, 4u);
   ASSERT_GE(diff_before, -1u << 27);
   auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
@@ -411,7 +533,7 @@
   uint32_t thunk_offset =
       CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
   uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
-  ASSERT_EQ(diff & 3u, 0u);
+  CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
   auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
@@ -497,7 +619,7 @@
   uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
   ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
   uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
-  ASSERT_EQ(diff & 3u, 0u);
+  CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
   auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
@@ -527,7 +649,7 @@
   uint32_t thunk_offset =
       CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
   uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
-  ASSERT_EQ(diff & 3u, 0u);
+  CHECK_ALIGNED(diff, 4u);
   ASSERT_LT(diff, 128 * MB);
   auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
@@ -551,74 +673,158 @@
   TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) {
-  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference1) {
+  TestNopsAdrpAdd(0u, 0x12345678u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) {
-  TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference2) {
+  TestNopsAdrpAdd(0u, -0x12345678u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) {
-  TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference3) {
+  TestNopsAdrpAdd(0u, 0x12345000u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) {
-  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) {
-  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) {
-  TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) {
-  TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) {
-  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+TEST_F(Arm64RelativePatcherTestDefault, StringReference4) {
+  TestNopsAdrpAdd(0u, 0x12345ffcu);
 }
 
 #define TEST_FOR_OFFSETS(test, disp1, disp2) \
   test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
   test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
 
+#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238)
+
+#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+    TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \
+  }
+
+TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238)
+
 // LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
-#define LDRW_PCREL_TEST(adrp_offset, disp) \
+#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
     TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238)
+TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238)
 
 // LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
-#define LDRX_PCREL_TEST(adrp_offset, disp) \
+#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
-    bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \
+    bool unaligned = !IsAligned<8u>(adrp_offset + 4u + static_cast<uint32_t>(disp)); \
     bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
     TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238)
+TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238)
 
 // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
-#define LDRW_SPREL_TEST(adrp_offset, disp) \
+#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4)
+TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4)
 
-#define LDRX_SPREL_TEST(adrp_offset, disp) \
+#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \
   TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
-TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8)
+TEST_FOR_OFFSETS(LDRX_SPREL_LDR_TEST, 0, 8)
+
+#define DEFAULT_LDUR_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## Ldur ## disp) { \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpLdurAdd(adrp_offset, has_thunk, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DENVER64_LDUR_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDenver64, StringReference ## adrp_offset ## Ldur ## disp) { \
+    TestAdrpLdurAdd(adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DENVER64_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_SUBX3X2_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubX3X2 ## disp) { \
+    /* SUB unrelated to "ADRP x0, addr". */ \
+    uint32_t sub = kSubXInsn | (100 << 10) | (2u << 5) | 3u;  /* SUB x3, x2, #100 */ \
+    TestAdrpInsn2Add(sub, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_SUBX3X2_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_SUBSX3X0_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubsX3X0 ## disp) { \
+    /* SUBS that uses the result of "ADRP x0, addr". */ \
+    uint32_t subs = kSubsXInsn | (100 << 10) | (0u << 5) | 3u;  /* SUBS x3, x0, #100 */ \
+    TestAdrpInsn2Add(subs, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_SUBSX3X0_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_ADDX0X0_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddX0X0 ## disp) { \
+    /* ADD that uses the result register of "ADRP x0, addr" as both source and destination. */ \
+    uint32_t add = kSubXInsn | (100 << 10) | (0u << 5) | 0u;  /* ADD x0, x0, #100 */ \
+    TestAdrpInsn2Add(add, adrp_offset, false, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_ADDX0X0_ADD_TEST, 0x12345678, 0xffffc840)
+
+#define DEFAULT_ADDSX0X2_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddsX0X2 ## disp) { \
+    /* ADDS that does not use the result of "ADRP x0, addr" but overwrites that register. */ \
+    uint32_t adds = kAddsXInsn | (100 << 10) | (2u << 5) | 0u;  /* ADDS x0, x2, #100 */ \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu); \
+    TestAdrpInsn2Add(adds, adrp_offset, has_thunk, disp); \
+  }
+
+TEST_FOR_OFFSETS(DEFAULT_ADDSX0X2_ADD_TEST, 0x12345678, 0xffffc840)
+
+// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
+#define LDRW_PCREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WPcRel ## disp) { \
+    TestAdrpLdrPcRelAdd(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_PCREL_ADD_TEST, 0x1234, 0x1238)
+
+// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
+#define LDRX_PCREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XPcRel ## disp) { \
+    bool unaligned = !IsAligned<8u>(adrp_offset + 4u + static_cast<uint32_t>(disp)); \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
+    TestAdrpLdrPcRelAdd(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_PCREL_ADD_TEST, 0x1234, 0x1238)
+
+// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
+#define LDRW_SPREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WSpRel ## disp) { \
+    TestAdrpLdrSpRelAdd(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4)
+
+#define LDRX_SPREL_ADD_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XSpRel ## disp) { \
+    TestAdrpLdrSpRelAdd(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
 
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h
index 1727d52..dbda03f 100644
--- a/compiler/linker/multi_oat_relative_patcher.h
+++ b/compiler/linker/multi_oat_relative_patcher.h
@@ -103,13 +103,13 @@
   }
 
   // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment.
-  void PatchDexCacheReference(std::vector<uint8_t>* code,
-                              const LinkerPatch& patch,
-                              uint32_t patch_offset,
-                              uint32_t target_offset) {
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) {
     patch_offset += adjustment_;
     target_offset += adjustment_;
-    relative_patcher_->PatchDexCacheReference(code, patch, patch_offset, target_offset);
+    relative_patcher_->PatchPcRelativeReference(code, patch, patch_offset, target_offset);
   }
 
   // Wrappers around RelativePatcher for statistics retrieval.
diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc
index 792cdfe..92a96a0 100644
--- a/compiler/linker/multi_oat_relative_patcher_test.cc
+++ b/compiler/linker/multi_oat_relative_patcher_test.cc
@@ -86,10 +86,10 @@
       last_target_offset_ = target_offset;
     }
 
-    void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE {
+    void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  const LinkerPatch& patch,
+                                  uint32_t patch_offset,
+                                  uint32_t target_offset) OVERRIDE {
       last_literal_offset_ = patch.LiteralOffset();
       last_patch_offset_ = patch_offset;
       last_target_offset_ = target_offset;
@@ -277,7 +277,7 @@
   uint32_t method2_target_offset = 0xccccu;
   LinkerPatch method2_patch =
       LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u);
-  patcher_.PatchDexCacheReference(
+  patcher_.PatchPcRelativeReference(
       &code, method2_patch, method2_patch_offset, method2_target_offset);
   DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_);
   DCHECK_EQ(method2_patch_offset + adjustment1, mock_->last_patch_offset_);
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index 6727c17..3a22983 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -62,10 +62,10 @@
       LOG(FATAL) << "Unexpected relative call patch.";
     }
 
-    virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                        uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                        uint32_t target_offset ATTRIBUTE_UNUSED) {
+    void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                  uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                  uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
       LOG(FATAL) << "Unexpected relative dex cache array patch.";
     }
 
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
index ba37451..a22b9f2 100644
--- a/compiler/linker/relative_patcher.h
+++ b/compiler/linker/relative_patcher.h
@@ -104,10 +104,10 @@
                          uint32_t target_offset) = 0;
 
   // Patch a reference to a dex cache location.
-  virtual void PatchDexCacheReference(std::vector<uint8_t>* code,
-                                      const LinkerPatch& patch,
-                                      uint32_t patch_offset,
-                                      uint32_t target_offset) = 0;
+  virtual void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                        const LinkerPatch& patch,
+                                        uint32_t patch_offset,
+                                        uint32_t target_offset) = 0;
 
  protected:
   RelativePatcher()
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 704135a..ec69107 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -51,6 +51,7 @@
                 instruction_set,
                 /* instruction_set_features*/ nullptr,
                 /* boot_image */ false,
+                /* app_image */ false,
                 /* image_classes */ nullptr,
                 /* compiled_classes */ nullptr,
                 /* compiled_methods */ nullptr,
@@ -85,9 +86,15 @@
                          const ArrayRef<const LinkerPatch>& patches) {
     compiled_method_refs_.push_back(method_ref);
     compiled_methods_.emplace_back(new CompiledMethod(
-        &driver_, instruction_set_, code,
-        0u, 0u, 0u, ArrayRef<const SrcMapElem>(), ArrayRef<const uint8_t>(),
-        ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+        &driver_,
+        instruction_set_,
+        code,
+        /* frame_size_in_bytes */ 0u,
+        /* core_spill_mask */ 0u,
+        /* fp_spill_mask */ 0u,
+        /* src_mapping_table */ ArrayRef<const SrcMapElem>(),
+        /* vmap_table */ ArrayRef<const uint8_t>(),
+        /* cfi_info */ ArrayRef<const uint8_t>(),
         patches));
   }
 
@@ -142,20 +149,27 @@
         patched_code_.assign(code.begin(), code.end());
         code = ArrayRef<const uint8_t>(patched_code_);
         for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-          if (patch.Type() == kLinkerPatchCallRelative) {
+          if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
             auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
             uint32_t target_offset =
                 result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
             patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
                                 offset + patch.LiteralOffset(), target_offset);
-          } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+          } else if (patch.GetType() == LinkerPatch::Type::kDexCacheArray) {
             uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
-            patcher_->PatchDexCacheReference(&patched_code_,
-                                             patch,
-                                             offset + patch.LiteralOffset(),
-                                             target_offset);
+            patcher_->PatchPcRelativeReference(&patched_code_,
+                                               patch,
+                                               offset + patch.LiteralOffset(),
+                                               target_offset);
+          } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) {
+            uint32_t target_offset = string_index_to_offset_map_.Get(patch.TargetStringIndex());
+            patcher_->PatchPcRelativeReference(&patched_code_,
+                                               patch,
+                                               offset + patch.LiteralOffset(),
+                                               target_offset);
           } else {
-            LOG(FATAL) << "Bad patch type.";
+            LOG(FATAL) << "Bad patch type. " << patch.GetType();
+            UNREACHABLE();
           }
         }
       }
@@ -257,6 +271,7 @@
   MethodOffsetMap method_offset_map_;
   std::unique_ptr<RelativePatcher> patcher_;
   uint32_t dex_cache_arrays_begin_;
+  SafeMap<uint32_t, uint32_t> string_index_to_offset_map_;
   std::vector<MethodReference> compiled_method_refs_;
   std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
   std::vector<uint8_t> patched_code_;
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 24b1481..768d31a 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -21,10 +21,10 @@
 namespace art {
 namespace linker {
 
-void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                const LinkerPatch& patch,
-                                                uint32_t patch_offset,
-                                                uint32_t target_offset) {
+void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                  const LinkerPatch& patch,
+                                                  uint32_t patch_offset,
+                                                  uint32_t target_offset) {
   uint32_t anchor_literal_offset = patch.PcInsnOffset();
   uint32_t literal_offset = patch.LiteralOffset();
 
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
index ddc244c..fbf9ad4 100644
--- a/compiler/linker/x86/relative_patcher_x86.h
+++ b/compiler/linker/x86/relative_patcher_x86.h
@@ -26,10 +26,10 @@
  public:
   X86RelativePatcher() { }
 
-  void PatchDexCacheReference(std::vector<uint8_t>* code,
-                              const LinkerPatch& patch,
-                              uint32_t patch_offset,
-                              uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 };
 
 }  // namespace linker
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index 7acc330..2a44b79 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -70,15 +70,19 @@
   uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method1_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+      static_cast<uint8_t>(diff_after),
+      static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16),
+      static_cast<uint8_t>(diff_after >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method2_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+      static_cast<uint8_t>(diff_before),
+      static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16),
+      static_cast<uint8_t>(diff_before >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
 }
@@ -95,8 +99,10 @@
   uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
   static const uint8_t expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
@@ -125,8 +131,42 @@
       0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
       0x5b,                                 // pop ebx
       0x8b, 0x83,                           // mov eax, [ebx + diff]
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, StringReference) {
+  constexpr uint32_t kStringIndex = 1u;
+  constexpr uint32_t kStringOffset = 0x12345678;
+  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
+  static const uint8_t raw_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8d, 0x83, 0x00, 0x01, 0x00, 0x00,   // lea eax, [ebx + 256 (kDummy32BitValue)]
+  };
+  constexpr uint32_t anchor_offset = 5u;  // After call +0.
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kStringOffset - (result.second + anchor_offset);
+  static const uint8_t expected_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8d, 0x83,                           // lea eax, [ebx + diff]
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
index e571f50..2ff6930 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -21,10 +21,10 @@
 namespace art {
 namespace linker {
 
-void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
-                                                   const LinkerPatch& patch,
-                                                   uint32_t patch_offset,
-                                                   uint32_t target_offset) {
+void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                     const LinkerPatch& patch,
+                                                     uint32_t patch_offset,
+                                                     uint32_t target_offset) {
   DCHECK_LE(patch.LiteralOffset() + 4u, code->size());
   // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
   uint32_t displacement = target_offset - patch_offset;
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
index feecb3a..11bb6d5 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.h
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.h
@@ -26,10 +26,10 @@
  public:
   X86_64RelativePatcher() { }
 
-  void PatchDexCacheReference(std::vector<uint8_t>* code,
-                              const LinkerPatch& patch,
-                              uint32_t patch_offset,
-                              uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
 };
 
 }  // namespace linker
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
index 36e0f01..2b46453 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -29,6 +29,8 @@
   static const ArrayRef<const uint8_t> kCallCode;
   static const uint8_t kDexCacheLoadRawCode[];
   static const ArrayRef<const uint8_t> kDexCacheLoadCode;
+  static const uint8_t kStringReferenceRawCode[];
+  static const ArrayRef<const uint8_t> kStringReferenceCode;
 
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
@@ -51,6 +53,14 @@
 const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode(
     kDexCacheLoadRawCode);
 
+const uint8_t X86_64RelativePatcherTest::kStringReferenceRawCode[] = {
+    0x8d, 0x05,  // lea eax, [rip + <offset>]
+    0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kStringReferenceCode(
+    kStringReferenceRawCode);
+
 TEST_F(X86_64RelativePatcherTest, CallSelf) {
   LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
@@ -80,15 +90,19 @@
   uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method1_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+      static_cast<uint8_t>(diff_after),
+      static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16),
+      static_cast<uint8_t>(diff_after >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
   uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
   static const uint8_t method2_expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+      static_cast<uint8_t>(diff_before),
+      static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16),
+      static_cast<uint8_t>(diff_before >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
 }
@@ -105,8 +119,10 @@
   uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
   static const uint8_t expected_code[] = {
       0xe8,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
@@ -126,8 +142,34 @@
       dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
   static const uint8_t expected_code[] = {
       0x8b, 0x05,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, StringReference) {
+  constexpr uint32_t kStringIndex = 1u;
+  constexpr uint32_t kStringOffset = 0x12345678;
+  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(
+          kStringReferenceCode.size() - 4u, nullptr, 0u, kStringIndex),
+  };
+  AddCompiledMethod(MethodRef(1u), kStringReferenceCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kStringOffset - (result.second + kStringReferenceCode.size());
+  static const uint8_t expected_code[] = {
+      0x8d, 0x05,
+      static_cast<uint8_t>(diff),
+      static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16),
+      static_cast<uint8_t>(diff >> 24)
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index eaf0e17..5b19284 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -112,6 +112,7 @@
                                               insn_set,
                                               insn_features_.get(),
                                               /* boot_image */ false,
+                                              /* app_image */ false,
                                               /* image_classes */ nullptr,
                                               /* compiled_classes */ nullptr,
                                               /* compiled_methods */ nullptr,
@@ -442,7 +443,7 @@
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c2f19c9..8da9f06 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -275,9 +275,7 @@
     size_code_alignment_(0),
     size_relative_call_thunks_(0),
     size_misc_thunks_(0),
-    size_mapping_table_(0),
     size_vmap_table_(0),
-    size_gc_map_(0),
     size_oat_dex_file_location_size_(0),
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
@@ -498,72 +496,6 @@
 OatWriter::~OatWriter() {
 }
 
-struct OatWriter::GcMapDataAccess {
-  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetGcMap();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].gc_map_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].gc_map_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "GC map";
-  }
-};
-
-struct OatWriter::MappingTableDataAccess {
-  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetMappingTable();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].mapping_table_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].mapping_table_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "mapping table";
-  }
-};
-
-struct OatWriter::VmapTableDataAccess {
-  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return compiled_method->GetVmapTable();
-  }
-
-  static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
-    uint32_t offset = oat_class->method_headers_[method_offsets_index].vmap_table_offset_;
-    return offset == 0u ? 0u :
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static void SetOffset(OatClass* oat_class, size_t method_offsets_index, uint32_t offset)
-      ALWAYS_INLINE {
-    oat_class->method_headers_[method_offsets_index].vmap_table_offset_ =
-        (oat_class->method_offsets_[method_offsets_index].code_offset_ & ~1) - offset;
-  }
-
-  static const char* Name() {
-    return "vmap table";
-  }
-};
-
 class OatWriter::DexMethodVisitor {
  public:
   DexMethodVisitor(OatWriter* writer, size_t offset)
@@ -726,26 +658,24 @@
       uint32_t thumb_offset = compiled_method->CodeDelta();
 
       // Deduplicate code arrays if we are not producing debuggable code.
-      bool deduped = false;
+      bool deduped = true;
       MethodReference method_ref(dex_file_, it.GetMemberIndex());
       if (debuggable_) {
         quick_code_offset = writer_->relative_patcher_->GetOffset(method_ref);
         if (quick_code_offset != 0u) {
           // Duplicate methods, we want the same code for both of them so that the oat writer puts
           // the same code in both ArtMethods so that we do not get different oat code at runtime.
-          deduped = true;
         } else {
           quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
+          deduped = false;
         }
       } else {
-        auto lb = dedupe_map_.lower_bound(compiled_method);
-        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
-          quick_code_offset = lb->second;
-          deduped = true;
-        } else {
-          quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset);
-          dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
-        }
+        quick_code_offset = dedupe_map_.GetOrCreate(
+            compiled_method,
+            [this, &deduped, compiled_method, &it, thumb_offset]() {
+              deduped = false;
+              return NewQuickCodeOffset(compiled_method, it, thumb_offset);
+            });
       }
 
       if (code_size != 0) {
@@ -763,33 +693,25 @@
       // Update quick method header.
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-      uint32_t mapping_table_offset = method_header->mapping_table_offset_;
       uint32_t vmap_table_offset = method_header->vmap_table_offset_;
       // If we don't have quick code, then we must have a vmap, as that is how the dex2dex
       // compiler records its transformations.
       DCHECK(!quick_code.empty() || vmap_table_offset != 0);
-      uint32_t gc_map_offset = method_header->gc_map_offset_;
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
-      if (mapping_table_offset != 0u && code_offset != 0u) {
-        mapping_table_offset += code_offset;
-        DCHECK_LT(mapping_table_offset, code_offset) << "Overflow in oat offsets";
-      }
       if (vmap_table_offset != 0u && code_offset != 0u) {
         vmap_table_offset += code_offset;
         DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets";
       }
-      if (gc_map_offset != 0u && code_offset != 0u) {
-        gc_map_offset += code_offset;
-        DCHECK_LT(gc_map_offset, code_offset) << "Overflow in oat offsets";
-      }
       uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
       uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
-      *method_header = OatQuickMethodHeader(mapping_table_offset, vmap_table_offset,
-                                            gc_map_offset, frame_size_in_bytes, core_spill_mask,
-                                            fp_spill_mask, code_size);
+      *method_header = OatQuickMethodHeader(vmap_table_offset,
+                                            frame_size_in_bytes,
+                                            core_spill_mask,
+                                            fp_spill_mask,
+                                            code_size);
 
       if (!deduped) {
         // Update offsets. (Checksum is updated when writing.)
@@ -831,30 +753,6 @@
         writer_->method_info_.push_back(info);
       }
 
-      if (kIsDebugBuild) {
-        // We expect GC maps except when the class hasn't been verified or the method is native.
-        const CompilerDriver* compiler_driver = writer_->compiler_driver_;
-        ClassReference class_ref(dex_file_, class_def_index_);
-        CompiledClass* compiled_class = compiler_driver->GetCompiledClass(class_ref);
-        mirror::Class::Status status;
-        if (compiled_class != nullptr) {
-          status = compiled_class->GetStatus();
-        } else if (compiler_driver->GetVerificationResults()->IsClassRejected(class_ref)) {
-          status = mirror::Class::kStatusError;
-        } else {
-          status = mirror::Class::kStatusNotReady;
-        }
-        ArrayRef<const uint8_t> gc_map = compiled_method->GetGcMap();
-        if (!gc_map.empty()) {
-          size_t gc_map_size = gc_map.size() * sizeof(gc_map[0]);
-          bool is_native = it.MemberIsNative();
-          CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
-              << gc_map_size << " " << (is_native ? "true" : "false") << " "
-              << (status < mirror::Class::kStatusVerified) << " " << status << " "
-              << PrettyMethod(it.GetMemberIndex(), *dex_file_);
-        }
-      }
-
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
       OatMethodOffsets* offsets = &oat_class->method_offsets_[method_offsets_index_];
       offsets->code_offset_ = quick_code_offset;
@@ -872,15 +770,9 @@
         return lhs->GetQuickCode().data() < rhs->GetQuickCode().data();
       }
       // If the code is the same, all other fields are likely to be the same as well.
-      if (UNLIKELY(lhs->GetMappingTable().data() != rhs->GetMappingTable().data())) {
-        return lhs->GetMappingTable().data() < rhs->GetMappingTable().data();
-      }
       if (UNLIKELY(lhs->GetVmapTable().data() != rhs->GetVmapTable().data())) {
         return lhs->GetVmapTable().data() < rhs->GetVmapTable().data();
       }
-      if (UNLIKELY(lhs->GetGcMap().data() != rhs->GetGcMap().data())) {
-        return lhs->GetGcMap().data() < rhs->GetGcMap().data();
-      }
       if (UNLIKELY(lhs->GetPatches().data() != rhs->GetPatches().data())) {
         return lhs->GetPatches().data() < rhs->GetPatches().data();
       }
@@ -907,7 +799,6 @@
   const bool debuggable_;
 };
 
-template <typename DataAccess>
 class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
  public:
   InitMapMethodVisitor(OatWriter* writer, size_t offset)
@@ -921,19 +812,21 @@
 
     if (compiled_method != nullptr) {
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
-      DCHECK_EQ(DataAccess::GetOffset(oat_class, method_offsets_index_), 0u);
+      DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
 
-      ArrayRef<const uint8_t> map = DataAccess::GetData(compiled_method);
+      ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
       uint32_t map_size = map.size() * sizeof(map[0]);
       if (map_size != 0u) {
-        auto lb = dedupe_map_.lower_bound(map.data());
-        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(map.data(), lb->first)) {
-          DataAccess::SetOffset(oat_class, method_offsets_index_, lb->second);
-        } else {
-          DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
-          dedupe_map_.PutBefore(lb, map.data(), offset_);
-          offset_ += map_size;
-        }
+        size_t offset = dedupe_map_.GetOrCreate(
+            map.data(),
+            [this, map_size]() {
+              uint32_t new_offset = offset_;
+              offset_ += map_size;
+              return new_offset;
+            });
+        // Code offset is not initialized yet, so set the map offset to 0u-offset.
+        DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
+        oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
       }
       ++method_offsets_index_;
     }
@@ -1046,6 +939,7 @@
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
       dex_cache_ = class_linker_->FindDexCache(Thread::Current(), *dex_file);
+      DCHECK(dex_cache_ != nullptr);
     }
     return true;
   }
@@ -1115,28 +1009,56 @@
           quick_code = ArrayRef<const uint8_t>(patched_code_);
           for (const LinkerPatch& patch : compiled_method->GetPatches()) {
             uint32_t literal_offset = patch.LiteralOffset();
-            if (patch.Type() == kLinkerPatchCallRelative) {
-              // NOTE: Relative calls across oat files are not supported.
-              uint32_t target_offset = GetTargetOffset(patch);
-              writer_->relative_patcher_->PatchCall(&patched_code_,
-                                                    literal_offset,
-                                                    offset_ + literal_offset,
-                                                    target_offset);
-            } else if (patch.Type() == kLinkerPatchDexCacheArray) {
-              uint32_t target_offset = GetDexCacheOffset(patch);
-              writer_->relative_patcher_->PatchDexCacheReference(&patched_code_,
-                                                                 patch,
-                                                                 offset_ + literal_offset,
-                                                                 target_offset);
-            } else if (patch.Type() == kLinkerPatchCall) {
-              uint32_t target_offset = GetTargetOffset(patch);
-              PatchCodeAddress(&patched_code_, literal_offset, target_offset);
-            } else if (patch.Type() == kLinkerPatchMethod) {
-              ArtMethod* method = GetTargetMethod(patch);
-              PatchMethodAddress(&patched_code_, literal_offset, method);
-            } else if (patch.Type() == kLinkerPatchType) {
-              mirror::Class* type = GetTargetType(patch);
-              PatchObjectAddress(&patched_code_, literal_offset, type);
+            switch (patch.GetType()) {
+              case LinkerPatch::Type::kCallRelative: {
+                // NOTE: Relative calls across oat files are not supported.
+                uint32_t target_offset = GetTargetOffset(patch);
+                writer_->relative_patcher_->PatchCall(&patched_code_,
+                                                      literal_offset,
+                                                      offset_ + literal_offset,
+                                                      target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kDexCacheArray: {
+                uint32_t target_offset = GetDexCacheOffset(patch);
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kStringRelative: {
+                uint32_t target_offset = GetTargetObjectOffset(GetTargetString(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kCall: {
+                uint32_t target_offset = GetTargetOffset(patch);
+                PatchCodeAddress(&patched_code_, literal_offset, target_offset);
+                break;
+              }
+              case LinkerPatch::Type::kMethod: {
+                ArtMethod* method = GetTargetMethod(patch);
+                PatchMethodAddress(&patched_code_, literal_offset, method);
+                break;
+              }
+              case LinkerPatch::Type::kString: {
+                mirror::String* string = GetTargetString(patch);
+                PatchObjectAddress(&patched_code_, literal_offset, string);
+                break;
+              }
+              case LinkerPatch::Type::kType: {
+                mirror::Class* type = GetTargetType(patch);
+                PatchObjectAddress(&patched_code_, literal_offset, type);
+                break;
+              }
+              default: {
+                DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kRecordPosition);
+                break;
+              }
             }
           }
         }
@@ -1205,15 +1127,29 @@
     return target_offset;
   }
 
-  mirror::Class* GetTargetType(const LinkerPatch& patch)
+  mirror::DexCache* GetDexCache(const DexFile* target_dex_file)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
-        ? dex_cache_ : class_linker_->FindDexCache(Thread::Current(), *patch.TargetTypeDexFile());
+    return (target_dex_file == dex_file_)
+        ? dex_cache_
+        : class_linker_->FindDexCache(Thread::Current(), *target_dex_file);
+  }
+
+  mirror::Class* GetTargetType(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = GetDexCache(patch.TargetTypeDexFile());
     mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
     CHECK(type != nullptr);
     return type;
   }
 
+  mirror::String* GetTargetString(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = GetDexCache(patch.TargetStringDexFile());
+    mirror::String* string = dex_cache->GetResolvedString(patch.TargetStringIndex());
+    DCHECK(string != nullptr);
+    DCHECK(writer_->HasBootImage() ||
+           Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
+    return string;
+  }
+
   uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
     if (writer_->HasBootImage()) {
       uintptr_t element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<uintptr_t>(
@@ -1227,6 +1163,15 @@
     }
   }
 
+  uint32_t GetTargetObjectOffset(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(writer_->HasBootImage());
+    object = writer_->image_writer_->GetImageAddress(object);
+    size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
+    uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
+    // TODO: Clean up offset types. The target offset must be treated as signed.
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(object) - oat_data_begin);
+  }
+
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     if (writer_->HasBootImage()) {
@@ -1296,10 +1241,11 @@
   }
 };
 
-template <typename DataAccess>
 class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
  public:
-  WriteMapMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
+  WriteMapMethodVisitor(OatWriter* writer,
+                        OutputStream* out,
+                        const size_t file_offset,
                         size_t relative_offset)
     : OatDexMethodVisitor(writer, relative_offset),
       out_(out),
@@ -1314,22 +1260,31 @@
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      uint32_t map_offset = DataAccess::GetOffset(oat_class, method_offsets_index_);
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_;
+      uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
       ++method_offsets_index_;
 
-      // Write deduplicated map.
-      ArrayRef<const uint8_t> map = DataAccess::GetData(compiled_method);
-      size_t map_size = map.size() * sizeof(map[0]);
-      DCHECK((map_size == 0u && map_offset == 0u) ||
-            (map_size != 0u && map_offset != 0u && map_offset <= offset_))
-          << map_size << " " << map_offset << " " << offset_ << " "
-          << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
-      if (map_size != 0u && map_offset == offset_) {
-        if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
-          ReportWriteFailure(it);
-          return false;
+      DCHECK((compiled_method->GetVmapTable().size() == 0u && map_offset == 0u) ||
+             (compiled_method->GetVmapTable().size() != 0u && map_offset != 0u))
+          << compiled_method->GetVmapTable().size() << " " << map_offset << " "
+          << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+
+      if (map_offset != 0u) {
+        // Transform map_offset to actual oat data offset.
+        map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
+        DCHECK_NE(map_offset, 0u);
+        DCHECK_LE(map_offset, offset_) << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+
+        ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+        size_t map_size = map.size() * sizeof(map[0]);
+        if (map_offset == offset_) {
+          // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
+          if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
+            ReportWriteFailure(it);
+            return false;
+          }
+          offset_ += map_size;
         }
-        offset_ += map_size;
       }
       DCHECK_OFFSET_();
     }
@@ -1342,7 +1297,7 @@
   size_t const file_offset_;
 
   void ReportWriteFailure(const ClassDataItemIterator& it) {
-    PLOG(ERROR) << "Failed to write " << DataAccess::Name() << " for "
+    PLOG(ERROR) << "Failed to write map for "
         << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
   }
 };
@@ -1435,19 +1390,10 @@
 }
 
 size_t OatWriter::InitOatMaps(size_t offset) {
-  #define VISIT(VisitorType)                          \
-    do {                                              \
-      VisitorType visitor(this, offset);              \
-      bool success = VisitDexMethods(&visitor);       \
-      DCHECK(success);                                \
-      offset = visitor.GetOffset();                   \
-    } while (false)
-
-  VISIT(InitMapMethodVisitor<GcMapDataAccess>);
-  VISIT(InitMapMethodVisitor<MappingTableDataAccess>);
-  VISIT(InitMapMethodVisitor<VmapTableDataAccess>);
-
-  #undef VISIT
+  InitMapMethodVisitor visitor(this, offset);
+  bool success = VisitDexMethods(&visitor);
+  DCHECK(success);
+  offset = visitor.GetOffset();
 
   return offset;
 }
@@ -1467,7 +1413,7 @@
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field.reset(compiler_driver_->Create ## fn_name()); \
+      field = compiler_driver_->Create ## fn_name(); \
       offset += field->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
@@ -1601,9 +1547,7 @@
     DO_STAT(size_code_alignment_);
     DO_STAT(size_relative_call_thunks_);
     DO_STAT(size_misc_thunks_);
-    DO_STAT(size_mapping_table_);
     DO_STAT(size_vmap_table_);
-    DO_STAT(size_gc_map_);
     DO_STAT(size_oat_dex_file_location_size_);
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
@@ -1718,29 +1662,14 @@
 }
 
 size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  #define VISIT(VisitorType)                                              \
-    do {                                                                  \
-      VisitorType visitor(this, out, file_offset, relative_offset);       \
-      if (UNLIKELY(!VisitDexMethods(&visitor))) {                         \
-        return 0;                                                         \
-      }                                                                   \
-      relative_offset = visitor.GetOffset();                              \
-    } while (false)
-
-  size_t gc_maps_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<GcMapDataAccess>);
-  size_gc_map_ = relative_offset - gc_maps_offset;
-
-  size_t mapping_tables_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<MappingTableDataAccess>);
-  size_mapping_table_ = relative_offset - mapping_tables_offset;
-
   size_t vmap_tables_offset = relative_offset;
-  VISIT(WriteMapMethodVisitor<VmapTableDataAccess>);
+  WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
+  if (UNLIKELY(!VisitDexMethods(&visitor))) {
+    return 0;
+  }
+  relative_offset = visitor.GetOffset();
   size_vmap_table_ = relative_offset - vmap_tables_offset;
 
-  #undef VISIT
-
   return relative_offset;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5e7a4a3..3862798 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -219,13 +219,6 @@
   class OatClass;
   class OatDexFile;
 
-  // The DataAccess classes are helper classes that provide access to members related to
-  // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
-  // we can share a lot of code for processing the maps with template classes below.
-  struct GcMapDataAccess;
-  struct MappingTableDataAccess;
-  struct VmapTableDataAccess;
-
   // The function VisitDexMethods() below iterates through all the methods in all
   // the compiled dex files in order of their definitions. The method visitor
   // classes provide individual bits of processing for each of the passes we need to
@@ -235,11 +228,9 @@
   class OatDexMethodVisitor;
   class InitOatClassesMethodVisitor;
   class InitCodeMethodVisitor;
-  template <typename DataAccess>
   class InitMapMethodVisitor;
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
-  template <typename DataAccess>
   class WriteMapMethodVisitor;
 
   // Visit all the methods in all the compiled dex files in their definition order
@@ -354,9 +345,7 @@
   uint32_t size_code_alignment_;
   uint32_t size_relative_call_thunks_;
   uint32_t size_misc_thunks_;
-  uint32_t size_mapping_table_;
   uint32_t size_vmap_table_;
-  uint32_t size_gc_map_;
   uint32_t size_oat_dex_file_location_size_;
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
new file mode 100644
index 0000000..5e70a82
--- /dev/null
+++ b/compiler/optimizing/block_builder.cc
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "block_builder.h"
+
+#include "bytecode_utils.h"
+
+namespace art {
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
+  return MaybeCreateBlockAt(dex_pc, dex_pc);
+}
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t semantic_dex_pc,
+                                                    uint32_t store_dex_pc) {
+  HBasicBlock* block = branch_targets_[store_dex_pc];
+  if (block == nullptr) {
+    block = new (arena_) HBasicBlock(graph_, semantic_dex_pc);
+    branch_targets_[store_dex_pc] = block;
+  }
+  DCHECK_EQ(block->GetDexPc(), semantic_dex_pc);
+  return block;
+}
+
+bool HBasicBlockBuilder::CreateBranchTargets() {
+  // Create the first block for the dex instructions, single successor of the entry block.
+  MaybeCreateBlockAt(0u);
+
+  if (code_item_.tries_size_ != 0) {
+    // Create branch targets at the start/end of the TryItem range. These are
+    // places where the program might fall through into/out of the a block and
+    // where TryBoundary instructions will be inserted later. Other edges which
+    // enter/exit the try blocks are a result of branches/switches.
+    for (size_t idx = 0; idx < code_item_.tries_size_; ++idx) {
+      const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item_, idx);
+      uint32_t dex_pc_start = try_item->start_addr_;
+      uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
+      MaybeCreateBlockAt(dex_pc_start);
+      if (dex_pc_end < code_item_.insns_size_in_code_units_) {
+        // TODO: Do not create block if the last instruction cannot fall through.
+        MaybeCreateBlockAt(dex_pc_end);
+      } else if (dex_pc_end == code_item_.insns_size_in_code_units_) {
+        // The TryItem spans until the very end of the CodeItem and therefore
+        // cannot have any code afterwards.
+      } else {
+        // The TryItem spans beyond the end of the CodeItem. This is invalid code.
+        return false;
+      }
+    }
+
+    // Create branch targets for exception handlers.
+    const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+    for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+      CatchHandlerIterator iterator(handlers_ptr);
+      for (; iterator.HasNext(); iterator.Next()) {
+        MaybeCreateBlockAt(iterator.GetHandlerAddress());
+      }
+      handlers_ptr = iterator.EndDataPointer();
+    }
+  }
+
+  // Iterate over all instructions and find branching instructions. Create blocks for
+  // the locations these instructions branch to.
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (instruction.IsBranch()) {
+      number_of_branches_++;
+      MaybeCreateBlockAt(dex_pc + instruction.GetTargetOffset());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        MaybeCreateBlockAt(dex_pc + s_it.CurrentTargetOffset());
+
+        // Create N-1 blocks where we will insert comparisons of the input value
+        // against the Switch's case keys.
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          // Store the block under dex_pc of the current key at the switch data
+          // instruction for uniqueness but give it the dex_pc of the SWITCH
+          // instruction which it semantically belongs to.
+          MaybeCreateBlockAt(dex_pc, s_it.GetDexPcForCurrentIndex());
+        }
+      }
+    } else if (instruction.Opcode() == Instruction::MOVE_EXCEPTION) {
+      // End the basic block after MOVE_EXCEPTION. This simplifies the later
+      // stage of TryBoundary-block insertion.
+    } else {
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      if (it.IsLast()) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        return false;
+      } else {
+        MaybeCreateBlockAt(dex_pc + it.CurrentInstruction().SizeInCodeUnits());
+      }
+    }
+  }
+
+  return true;
+}
+
+void HBasicBlockBuilder::ConnectBasicBlocks() {
+  HBasicBlock* block = graph_->GetEntryBlock();
+  graph_->AddBlock(block);
+
+  bool is_throwing_block = false;
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+
+    // Check if this dex_pc address starts a new basic block.
+    HBasicBlock* next_block = GetBlockAt(dex_pc);
+    if (next_block != nullptr) {
+      if (block != nullptr) {
+        // Last instruction did not end its basic block but a new one starts here.
+        // It must have been a block falling through into the next one.
+        block->AddSuccessor(next_block);
+      }
+      block = next_block;
+      is_throwing_block = false;
+      graph_->AddBlock(block);
+    }
+
+    if (block == nullptr) {
+      // Ignore dead code.
+      continue;
+    }
+
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (!is_throwing_block && IsThrowingDexInstruction(instruction)) {
+      DCHECK(!ContainsElement(throwing_blocks_, block));
+      is_throwing_block = true;
+      throwing_blocks_.push_back(block);
+    }
+
+    if (instruction.IsBranch()) {
+      uint32_t target_dex_pc = dex_pc + instruction.GetTargetOffset();
+      block->AddSuccessor(GetBlockAt(target_dex_pc));
+    } else if (instruction.IsReturn() || (instruction.Opcode() == Instruction::THROW)) {
+      block->AddSuccessor(graph_->GetExitBlock());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        uint32_t target_dex_pc = dex_pc + s_it.CurrentTargetOffset();
+        block->AddSuccessor(GetBlockAt(target_dex_pc));
+
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          uint32_t next_case_dex_pc = s_it.GetDexPcForCurrentIndex();
+          HBasicBlock* next_case_block = GetBlockAt(next_case_dex_pc);
+          block->AddSuccessor(next_case_block);
+          block = next_case_block;
+          graph_->AddBlock(block);
+        }
+      }
+    } else {
+      // Remaining code only applies to instructions which end their basic block.
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+      block->AddSuccessor(GetBlockAt(next_dex_pc));
+    }
+
+    // The basic block ends here. Do not add any more instructions.
+    block = nullptr;
+  }
+
+  graph_->AddBlock(graph_->GetExitBlock());
+}
+
+// Returns the TryItem stored for `block` or nullptr if there is no info for it.
+static const DexFile::TryItem* GetTryItem(
+    HBasicBlock* block,
+    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
+  auto iterator = try_block_info.find(block->GetBlockId());
+  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
+}
+
+// Iterates over the exception handlers of `try_item`, finds the corresponding
+// catch blocks and makes them successors of `try_boundary`. The order of
+// successors matches the order in which runtime exception delivery searches
+// for a handler.
+static void LinkToCatchBlocks(HTryBoundary* try_boundary,
+                              const DexFile::CodeItem& code_item,
+                              const DexFile::TryItem* try_item,
+                              const ArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) {
+  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
+    try_boundary->AddExceptionHandler(catch_blocks.Get(it.GetHandlerAddress()));
+  }
+}
+
+bool HBasicBlockBuilder::MightHaveLiveNormalPredecessors(HBasicBlock* catch_block) {
+  if (kIsDebugBuild) {
+    DCHECK_NE(catch_block->GetDexPc(), kNoDexPc) << "Should not be called on synthetic blocks";
+    DCHECK(!graph_->GetEntryBlock()->GetSuccessors().empty())
+        << "Basic blocks must have been created and connected";
+    for (HBasicBlock* predecessor : catch_block->GetPredecessors()) {
+      DCHECK(!predecessor->IsSingleTryBoundary())
+          << "TryBoundary blocks must not have not been created yet";
+    }
+  }
+
+  const Instruction& first = GetDexInstructionAt(code_item_, catch_block->GetDexPc());
+  if (first.Opcode() == Instruction::MOVE_EXCEPTION) {
+    // Verifier guarantees that if a catch block begins with MOVE_EXCEPTION then
+    // it has no live normal predecessors.
+    return false;
+  } else if (catch_block->GetPredecessors().empty()) {
+    // Normal control-flow edges have already been created. Since block's list of
+    // predecessors is empty, it cannot have any live or dead normal predecessors.
+    return false;
+  }
+
+  // The catch block has normal predecessors but we do not know which are live
+  // and which will be removed during the initial DCE. Return `true` to signal
+  // that it may have live normal predecessors.
+  return true;
+}
+
+void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
+  if (code_item_.tries_size_ == 0) {
+    return;
+  }
+
+  // Keep a map of all try blocks and their respective TryItems. We do not use
+  // the block's pointer but rather its id to ensure deterministic iteration.
+  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Obtain TryItem information for blocks with throwing instructions, and split
+  // blocks which are both try & catch to simplify the graph.
+  for (HBasicBlock* block : graph_->GetBlocks()) {
+    if (block->GetDexPc() == kNoDexPc) {
+      continue;
+    }
+
+    // Do not bother creating exceptional edges for try blocks which have no
+    // throwing instructions. In that case we simply assume that the block is
+    // not covered by a TryItem. This prevents us from creating a throw-catch
+    // loop for synchronized blocks.
+    if (ContainsElement(throwing_blocks_, block)) {
+      // Try to find a TryItem covering the block.
+      const int32_t try_item_idx = DexFile::FindTryItem(code_item_, block->GetDexPc());
+      if (try_item_idx != -1) {
+        // Block throwing and in a TryItem. Store the try block information.
+        try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(code_item_, try_item_idx));
+      }
+    }
+  }
+
+  // Map from a handler dex_pc to the corresponding catch block.
+  ArenaSafeMap<uint32_t, HBasicBlock*> catch_blocks(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Iterate over catch blocks, create artifical landing pads if necessary to
+  // simplify the CFG, and set metadata.
+  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+    CatchHandlerIterator iterator(handlers_ptr);
+    for (; iterator.HasNext(); iterator.Next()) {
+      uint32_t address = iterator.GetHandlerAddress();
+      if (catch_blocks.find(address) != catch_blocks.end()) {
+        // Catch block already processed.
+        continue;
+      }
+
+      // Check if we should create an artifical landing pad for the catch block.
+      // We create one if the catch block is also a try block because we do not
+      // have a strategy for inserting TryBoundaries on exceptional edges.
+      // We also create one if the block might have normal predecessors so as to
+      // simplify register allocation.
+      HBasicBlock* catch_block = GetBlockAt(address);
+      bool is_try_block = (try_block_info.find(catch_block->GetBlockId()) != try_block_info.end());
+      if (is_try_block || MightHaveLiveNormalPredecessors(catch_block)) {
+        HBasicBlock* new_catch_block = new (arena_) HBasicBlock(graph_, address);
+        new_catch_block->AddInstruction(new (arena_) HGoto(address));
+        new_catch_block->AddSuccessor(catch_block);
+        graph_->AddBlock(new_catch_block);
+        catch_block = new_catch_block;
+      }
+
+      catch_blocks.Put(address, catch_block);
+      catch_block->SetTryCatchInformation(
+        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
+    }
+    handlers_ptr = iterator.EndDataPointer();
+  }
+
+  // Do a pass over the try blocks and insert entering TryBoundaries where at
+  // least one predecessor is not covered by the same TryItem as the try block.
+  // We do not split each edge separately, but rather create one boundary block
+  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
+      if (GetTryItem(predecessor, try_block_info) != entry.second) {
+        // Found a predecessor not covered by the same TryItem. Insert entering
+        // boundary block.
+        HTryBoundary* try_entry =
+            new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
+        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
+        LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+        break;
+      }
+    }
+  }
+
+  // Do a second pass over the try blocks and insert exit TryBoundaries where
+  // the successor is not in the same TryItem.
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    // NOTE: Do not use iterators because SplitEdge would invalidate them.
+    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
+      HBasicBlock* successor = try_block->GetSuccessors()[i];
+
+      // If the successor is a try block, all of its predecessors must be
+      // covered by the same TryItem. Otherwise the previous pass would have
+      // created a non-throwing boundary block.
+      if (GetTryItem(successor, try_block_info) != nullptr) {
+        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+        continue;
+      }
+
+      // Insert TryBoundary and link to catch blocks.
+      HTryBoundary* try_exit =
+          new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
+      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
+      LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+    }
+  }
+}
+
+bool HBasicBlockBuilder::Build() {
+  DCHECK(graph_->GetBlocks().empty());
+
+  graph_->SetEntryBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+  graph_->SetExitBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+
+  // TODO(dbrazdil): Do CreateBranchTargets and ConnectBasicBlocks in one pass.
+  if (!CreateBranchTargets()) {
+    return false;
+  }
+
+  ConnectBasicBlocks();
+  InsertTryBoundaryBlocks();
+
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
new file mode 100644
index 0000000..1be0b4c
--- /dev/null
+++ b/compiler/optimizing/block_builder.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "nodes.h"
+
+namespace art {
+
+class HBasicBlockBuilder : public ValueObject {
+ public:
+  HBasicBlockBuilder(HGraph* graph,
+                     const DexFile* const dex_file,
+                     const DexFile::CodeItem& code_item)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        branch_targets_(code_item.insns_size_in_code_units_,
+                        nullptr,
+                        arena_->Adapter(kArenaAllocGraphBuilder)),
+        throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)),
+        number_of_branches_(0u) {}
+
+  // Creates basic blocks in `graph_` at branch target dex_pc positions of the
+  // `code_item_`. Blocks are connected but left unpopulated with instructions.
+  // TryBoundary blocks are inserted at positions where control-flow enters/
+  // exits a try block.
+  bool Build();
+
+  size_t GetNumberOfBranches() const { return number_of_branches_; }
+  HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
+
+ private:
+  // Creates a basic block starting at given `dex_pc`.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
+
+  // Creates a basic block for bytecode instructions at `semantic_dex_pc` and
+  // stores it under the `store_dex_pc` key. This is used when multiple blocks
+  // share the same semantic dex_pc, e.g. when building switch decision trees.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t semantic_dex_pc, uint32_t store_dex_pc);
+
+  bool CreateBranchTargets();
+  void ConnectBasicBlocks();
+  void InsertTryBoundaryBlocks();
+
+  // Helper method which decides whether `catch_block` may have live normal
+  // predecessors and thus whether a synthetic catch block needs to be created
+  // to avoid mixing normal and exceptional predecessors.
+  // Should only be called during InsertTryBoundaryBlocks on blocks at catch
+  // handler dex_pcs.
+  bool MightHaveLiveNormalPredecessors(HBasicBlock* catch_block);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  ArenaVector<HBasicBlock*> branch_targets_;
+  ArenaVector<HBasicBlock*> throwing_blocks_;
+  size_t number_of_branches_;
+
+  static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
+
+  DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 084360f..6c6e5af 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -552,7 +552,11 @@
     DCHECK(!IsAddedBlock(block));
     first_index_bounds_check_map_.clear();
     HGraphVisitor::VisitBasicBlock(block);
-    AddComparesWithDeoptimization(block);
+    // We should never deoptimize from an osr method, otherwise we might wrongly optimize
+    // code dominated by the deoptimization.
+    if (!GetGraph()->IsCompilingOsr()) {
+      AddComparesWithDeoptimization(block);
+    }
   }
 
   void Finish() {
@@ -796,6 +800,27 @@
             ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
+    } else if (cond == kCondNE || cond == kCondEQ) {
+      if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) {
+        // Special case:
+        //   length == [c,d] yields [c, d] along true
+        //   length != [c,d] yields [c, d] along false
+        if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), lower, upper);
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
+        }
+        // In addition:
+        //   length == 0 yields [1, max] along false
+        //   length != 0 yields [1, max] along true
+        if (lower.GetConstant() == 0 && upper.GetConstant() == 0) {
+          ValueRange* new_range = new (GetGraph()->GetArena())
+              ValueRange(GetGraph()->GetArena(), ValueBound(nullptr, 1), ValueBound::Max());
+          ApplyRangeFromComparison(
+              left, block, cond == kCondEQ ? false_successor : true_successor, new_range);
+        }
+      }
     }
   }
 
@@ -951,13 +976,7 @@
   void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
-      IfCondition cmp = cond->GetCondition();
-      if (cmp == kCondGT || cmp == kCondGE ||
-          cmp == kCondLT || cmp == kCondLE) {
-        HInstruction* left = cond->GetLeft();
-        HInstruction* right = cond->GetRight();
-        HandleIf(instruction, left, right, cmp);
-      }
+      HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition());
     }
   }
 
@@ -1206,9 +1225,9 @@
           GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
       ArenaVector<HBoundsCheck*> standby(
           GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
-      for (HUseIterator<HInstruction*> it2(array_length->GetUses()); !it2.Done(); it2.Advance()) {
+      for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
         // Another bounds check in same or dominated block?
-        HInstruction* user = it2.Current()->GetUser();
+        HInstruction* user = use.GetUser();
         HBasicBlock* other_block = user->GetBlock();
         if (user->IsBoundsCheck() && block->Dominates(other_block)) {
           HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
@@ -1358,6 +1377,11 @@
       if (loop->IsIrreducible()) {
         return false;
       }
+      // We should never deoptimize from an osr method, otherwise we might wrongly optimize
+      // code dominated by the deoptimization.
+      if (GetGraph()->IsCompilingOsr()) {
+        return false;
+      }
       // A try boundary preheader is hard to handle.
       // TODO: remove this restriction.
       if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
@@ -1635,29 +1659,33 @@
         Primitive::Type type = instruction->GetType();
         HPhi* phi = nullptr;
         // Scan all uses of an instruction and replace each later use with a phi node.
-        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
-             !it2.Done();
-             it2.Advance()) {
-          HInstruction* user = it2.Current()->GetUser();
+        const HUseList<HInstruction*>& uses = instruction->GetUses();
+        for (auto it2 = uses.begin(), end2 = uses.end(); it2 != end2; /* ++it2 below */) {
+          HInstruction* user = it2->GetUser();
+          size_t index = it2->GetIndex();
+          // Increment `it2` now because `*it2` may disappear thanks to user->ReplaceInput().
+          ++it2;
           if (user->GetBlock() != true_block) {
             if (phi == nullptr) {
               phi = NewPhi(new_preheader, instruction, type);
             }
-            user->ReplaceInput(phi, it2.Current()->GetIndex());
+            user->ReplaceInput(phi, index);  // Removes the use node from the list.
           }
         }
         // Scan all environment uses of an instruction and replace each later use with a phi node.
-        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
-             !it2.Done();
-             it2.Advance()) {
-          HEnvironment* user = it2.Current()->GetUser();
+        const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+        for (auto it2 = env_uses.begin(), end2 = env_uses.end(); it2 != end2; /* ++it2 below */) {
+          HEnvironment* user = it2->GetUser();
+          size_t index = it2->GetIndex();
+          // Increment `it2` now because `*it2` may disappear thanks to user->RemoveAsUserOfInput().
+          ++it2;
           if (user->GetHolder()->GetBlock() != true_block) {
             if (phi == nullptr) {
               phi = NewPhi(new_preheader, instruction, type);
             }
-            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
-            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
-            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+            user->RemoveAsUserOfInput(index);
+            user->SetRawEnvAt(index, phi);
+            phi->AddEnvUseAt(user, index);
           }
         }
       }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1b62531..86742e6 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -20,148 +20,49 @@
 #include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/logging.h"
-#include "class_linker.h"
 #include "dex/verified_method.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "primitive.h"
-#include "scoped_thread_state_change.h"
-#include "ssa_builder.h"
 #include "thread.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
-void HGraphBuilder::InitializeLocals(uint16_t count) {
-  graph_->SetNumberOfVRegs(count);
-  locals_.resize(count);
-  for (int i = 0; i < count; i++) {
-    HLocal* local = new (arena_) HLocal(i);
-    entry_block_->AddInstruction(local);
-    locals_[i] = local;
-  }
-}
-
-void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) {
-  // dex_compilation_unit_ is null only when unit testing.
-  if (dex_compilation_unit_ == nullptr) {
-    return;
-  }
-
-  graph_->SetNumberOfInVRegs(number_of_parameters);
-  const char* shorty = dex_compilation_unit_->GetShorty();
-  int locals_index = locals_.size() - number_of_parameters;
-  int parameter_index = 0;
-
-  const DexFile::MethodId& referrer_method_id =
-      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
-  if (!dex_compilation_unit_->IsStatic()) {
-    // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
-                                                              referrer_method_id.class_idx_,
-                                                              parameter_index++,
-                                                              Primitive::kPrimNot,
-                                                              true);
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    number_of_parameters--;
-  }
-
-  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
-  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
-  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(
-        *dex_file_,
-        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
-        parameter_index++,
-        Primitive::GetType(shorty[shorty_pos]),
-        false);
-    ++shorty_pos;
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    // Store the parameter value in the local that the dex code will use
-    // to reference that parameter.
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
-        || (parameter->GetType() == Primitive::kPrimDouble);
-    if (is_wide) {
-      i++;
-      locals_index++;
-      parameter_index++;
-    }
-  }
-}
-
-template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(first, second, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
-template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
 void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
   if (compilation_stats_ != nullptr) {
     compilation_stats_->RecordStat(compilation_stat);
   }
 }
 
-bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
-                                    size_t number_of_branches) {
+bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
+  if (compiler_driver_ == nullptr) {
+    // Note that the compiler driver is null when unit testing.
+    return false;
+  }
+
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
   CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter();
   if (compiler_filter == CompilerFilter::kEverything) {
     return false;
   }
 
-  if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
+  if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
   // If it's large and contains no branches, it's likely to be machine generated initialization.
-  if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+  if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
       && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
@@ -169,2711 +70,39 @@
   return false;
 }
 
-void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Create branch targets at the start/end of the TryItem range. These are
-  // places where the program might fall through into/out of the a block and
-  // where TryBoundary instructions will be inserted later. Other edges which
-  // enter/exit the try blocks are a result of branches/switches.
-  for (size_t idx = 0; idx < code_item.tries_size_; ++idx) {
-    const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item, idx);
-    uint32_t dex_pc_start = try_item->start_addr_;
-    uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
-    FindOrCreateBlockStartingAt(dex_pc_start);
-    if (dex_pc_end < code_item.insns_size_in_code_units_) {
-      // TODO: Do not create block if the last instruction cannot fall through.
-      FindOrCreateBlockStartingAt(dex_pc_end);
-    } else {
-      // The TryItem spans until the very end of the CodeItem (or beyond if
-      // invalid) and therefore cannot have any code afterwards.
-    }
-  }
-
-  // Create branch targets for exception handlers.
-  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item, 0);
-  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
-  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
-    CatchHandlerIterator iterator(handlers_ptr);
-    for (; iterator.HasNext(); iterator.Next()) {
-      uint32_t address = iterator.GetHandlerAddress();
-      HBasicBlock* block = FindOrCreateBlockStartingAt(address);
-      block->SetTryCatchInformation(
-        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
-    }
-    handlers_ptr = iterator.EndDataPointer();
-  }
-}
-
-// Returns the TryItem stored for `block` or nullptr if there is no info for it.
-static const DexFile::TryItem* GetTryItem(
-    HBasicBlock* block,
-    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
-  auto iterator = try_block_info.find(block->GetBlockId());
-  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
-}
-
-void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary,
-                                      const DexFile::CodeItem& code_item,
-                                      const DexFile::TryItem* try_item) {
-  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
-    try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress()));
-  }
-}
-
-void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Keep a map of all try blocks and their respective TryItems. We do not use
-  // the block's pointer but rather its id to ensure deterministic iteration.
-  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
-      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
-
-  // Obtain TryItem information for blocks with throwing instructions, and split
-  // blocks which are both try & catch to simplify the graph.
-  // NOTE: We are appending new blocks inside the loop, so we need to use index
-  // because iterators can be invalidated. We remember the initial size to avoid
-  // iterating over the new blocks which cannot throw.
-  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
-    HBasicBlock* block = graph_->GetBlocks()[i];
-
-    // Do not bother creating exceptional edges for try blocks which have no
-    // throwing instructions. In that case we simply assume that the block is
-    // not covered by a TryItem. This prevents us from creating a throw-catch
-    // loop for synchronized blocks.
-    if (block->HasThrowingInstructions()) {
-      // Try to find a TryItem covering the block.
-      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dex_pc to find its TryItem.";
-      const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
-      if (try_item_idx != -1) {
-        // Block throwing and in a TryItem. Store the try block information.
-        HBasicBlock* throwing_block = block;
-        if (block->IsCatchBlock()) {
-          // Simplify blocks which are both try and catch, otherwise we would
-          // need a strategy for splitting exceptional edges. We split the block
-          // after the move-exception (if present) and mark the first part not
-          // throwing. The normal-flow edge between them will be split later.
-          throwing_block = block->SplitCatchBlockAfterMoveException();
-          // Move-exception does not throw and the block has throwing insructions
-          // so it must have been possible to split it.
-          DCHECK(throwing_block != nullptr);
-        }
-
-        try_block_info.Put(throwing_block->GetBlockId(),
-                           DexFile::GetTryItems(code_item, try_item_idx));
-      }
-    }
-  }
-
-  // Do a pass over the try blocks and insert entering TryBoundaries where at
-  // least one predecessor is not covered by the same TryItem as the try block.
-  // We do not split each edge separately, but rather create one boundary block
-  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
-      if (GetTryItem(predecessor, try_block_info) != entry.second) {
-        // Found a predecessor not covered by the same TryItem. Insert entering
-        // boundary block.
-        HTryBoundary* try_entry =
-            new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
-        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
-        LinkToCatchBlocks(try_entry, code_item, entry.second);
-        break;
-      }
-    }
-  }
-
-  // Do a second pass over the try blocks and insert exit TryBoundaries where
-  // the successor is not in the same TryItem.
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    // NOTE: Do not use iterators because SplitEdge would invalidate them.
-    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
-      HBasicBlock* successor = try_block->GetSuccessors()[i];
-
-      // If the successor is a try block, all of its predecessors must be
-      // covered by the same TryItem. Otherwise the previous pass would have
-      // created a non-throwing boundary block.
-      if (GetTryItem(successor, try_block_info) != nullptr) {
-        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
-        continue;
-      }
-
-      // Preserve the invariant that Return(Void) always jumps to Exit by moving
-      // it outside the try block if necessary.
-      HInstruction* last_instruction = try_block->GetLastInstruction();
-      if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
-        DCHECK_EQ(successor, exit_block_);
-        successor = try_block->SplitBefore(last_instruction);
-      }
-
-      // Insert TryBoundary and link to catch blocks.
-      HTryBoundary* try_exit =
-          new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
-      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
-      LinkToCatchBlocks(try_exit, code_item, entry.second);
-    }
-  }
-}
-
-GraphAnalysisResult HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item,
-                                              StackHandleScopeCollection* handles) {
+GraphAnalysisResult HGraphBuilder::BuildGraph() {
   DCHECK(graph_->GetBlocks().empty());
 
-  const uint16_t* code_ptr = code_item.insns_;
-  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
-  code_start_ = code_ptr;
+  graph_->SetNumberOfVRegs(code_item_.registers_size_);
+  graph_->SetNumberOfInVRegs(code_item_.ins_size_);
+  graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_);
+  graph_->SetHasTryCatch(code_item_.tries_size_ != 0);
 
-  // Setup the graph with the entry block and exit block.
-  entry_block_ = new (arena_) HBasicBlock(graph_, 0);
-  graph_->AddBlock(entry_block_);
-  exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc);
-  graph_->SetEntryBlock(entry_block_);
-  graph_->SetExitBlock(exit_block_);
-
-  graph_->SetHasTryCatch(code_item.tries_size_ != 0);
-
-  InitializeLocals(code_item.registers_size_);
-  graph_->SetMaximumNumberOfOutVRegs(code_item.outs_size_);
-
-  // Compute the number of dex instructions, blocks, and branches. We will
-  // check these values against limits given to the compiler.
-  size_t number_of_branches = 0;
-
-  // To avoid splitting blocks, we compute ahead of time the instructions that
-  // start a new block, and create these blocks.
-  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+  // 1) Create basic blocks and link them together. Basic blocks are left
+  //    unpopulated with the exception of synthetic blocks, e.g. HTryBoundaries.
+  if (!block_builder_.Build()) {
     return kAnalysisInvalidBytecode;
   }
 
-  // Note that the compiler driver is null when unit testing.
-  if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
-    return kAnalysisInvalidBytecode;
+  // 2) Decide whether to skip this method based on its code size and number
+  //    of branches.
+  if (SkipCompilation(block_builder_.GetNumberOfBranches())) {
+    return kAnalysisSkipped;
   }
 
-  // Find locations where we want to generate extra stackmaps for native debugging.
-  // This allows us to generate the info only at interesting points (for example,
-  // at start of java statement) rather than before every dex instruction.
-  const bool native_debuggable = compiler_driver_ != nullptr &&
-                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
-  ArenaBitVector* native_debug_info_locations;
-  if (native_debuggable) {
-    const uint32_t num_instructions = code_item.insns_size_in_code_units_;
-    native_debug_info_locations =
-        ArenaBitVector::Create(arena_, num_instructions, false, kArenaAllocGraphBuilder);
-    FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
-  }
-
-  CreateBlocksForTryCatch(code_item);
-
-  InitializeParameters(code_item.ins_size_);
-
-  size_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    // Update the current block if dex_pc starts a new block.
-    MaybeUpdateCurrentBlock(dex_pc);
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
-      if (current_block_ != nullptr) {
-        current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
-      }
-    }
-    if (!AnalyzeDexInstruction(instruction, dex_pc)) {
-      return kAnalysisInvalidBytecode;
-    }
-    dex_pc += instruction.SizeInCodeUnits();
-    code_ptr += instruction.SizeInCodeUnits();
-  }
-
-  // Add Exit to the exit block.
-  exit_block_->AddInstruction(new (arena_) HExit());
-  // Add the suspend check to the entry block.
-  entry_block_->AddInstruction(new (arena_) HSuspendCheck(0));
-  entry_block_->AddInstruction(new (arena_) HGoto());
-  // Add the exit block at the end.
-  graph_->AddBlock(exit_block_);
-
-  // Iterate over blocks covered by TryItems and insert TryBoundaries at entry
-  // and exit points. This requires all control-flow instructions and
-  // non-exceptional edges to have been created.
-  InsertTryBoundaryBlocks(code_item);
-
+  // 3) Build the dominator tree and fill in loop and try/catch metadata.
   GraphAnalysisResult result = graph_->BuildDominatorTree();
   if (result != kAnalysisSuccess) {
     return result;
   }
 
-  graph_->InitializeInexactObjectRTI(handles);
-  return SsaBuilder(graph_, handles).BuildSsa();
-}
-
-void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    return;
+  // 4) Populate basic blocks with instructions.
+  if (!instruction_builder_.Build()) {
+    return kAnalysisInvalidBytecode;
   }
 
-  if (current_block_ != nullptr) {
-    // Branching instructions clear current_block, so we know
-    // the last instruction of the current block is not a branching
-    // instruction. We add an unconditional goto to the found block.
-    current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-    current_block_->AddSuccessor(block);
-  }
-  graph_->AddBlock(block);
-  current_block_ = block;
-}
-
-void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item,
-                                                 ArenaBitVector* locations) {
-  // The callback gets called when the line number changes.
-  // In other words, it marks the start of new java statement.
-  struct Callback {
-    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
-      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
-      return false;
-    }
-  };
-  dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
-  // Instruction-specific tweaks.
-  const Instruction* const begin = Instruction::At(code_item.insns_);
-  const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
-  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
-    switch (inst->Opcode()) {
-      case Instruction::MOVE_EXCEPTION: {
-        // Stop in native debugger after the exception has been moved.
-        // The compiler also expects the move at the start of basic block so
-        // we do not want to interfere by inserting native-debug-info before it.
-        locations->ClearBit(inst->GetDexPc(code_item.insns_));
-        const Instruction* next = inst->Next();
-        if (next < end) {
-          locations->SetBit(next->GetDexPc(code_item.insns_));
-        }
-        break;
-      }
-      default:
-        break;
-    }
-  }
-}
-
-bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
-                                         const uint16_t* code_end,
-                                         size_t* number_of_branches) {
-  branch_targets_.resize(code_end - code_ptr, nullptr);
-
-  // Create the first block for the dex instructions, single successor of the entry block.
-  HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
-  branch_targets_[0] = block;
-  entry_block_->AddSuccessor(block);
-
-  // Iterate over all instructions and find branching instructions. Create blocks for
-  // the locations these instructions branch to.
-  uint32_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (instruction.IsBranch()) {
-      (*number_of_branches)++;
-      int32_t target = instruction.GetTargetOffset() + dex_pc;
-      // Create a block for the target instruction.
-      FindOrCreateBlockStartingAt(target);
-
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-
-      if (instruction.CanFlowThrough()) {
-        if (code_ptr >= code_end) {
-          // In the normal case we should never hit this but someone can artificially forge a dex
-          // file to fall-through out the method code. In this case we bail out compilation.
-          return false;
-        } else {
-          FindOrCreateBlockStartingAt(dex_pc);
-        }
-      }
-    } else if (instruction.IsSwitch()) {
-      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
-
-      uint16_t num_entries = table.GetNumEntries();
-
-      // In a packed-switch, the entry at index 0 is the starting key. In a sparse-switch, the
-      // entry at index 0 is the first key, and values are after *all* keys.
-      size_t offset = table.GetFirstValueIndex();
-
-      // Use a larger loop counter type to avoid overflow issues.
-      for (size_t i = 0; i < num_entries; ++i) {
-        // The target of the case.
-        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
-        FindOrCreateBlockStartingAt(target);
-
-        // Create a block for the switch-case logic. The block gets the dex_pc
-        // of the SWITCH instruction because it is part of its semantics.
-        block = new (arena_) HBasicBlock(graph_, dex_pc);
-        branch_targets_[table.GetDexPcForIndex(i)] = block;
-      }
-
-      // Fall-through. Add a block if there is more code afterwards.
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-      if (code_ptr >= code_end) {
-        // In the normal case we should never hit this but someone can artificially forge a dex
-        // file to fall-through out the method code. In this case we bail out compilation.
-        // (A switch can fall-through so we don't need to check CanFlowThrough().)
-        return false;
-      } else {
-        FindOrCreateBlockStartingAt(dex_pc);
-      }
-    } else {
-      code_ptr += instruction.SizeInCodeUnits();
-      dex_pc += instruction.SizeInCodeUnits();
-    }
-  }
-  return true;
-}
-
-HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
-  DCHECK_GE(dex_pc, 0);
-  return branch_targets_[dex_pc];
-}
-
-HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    block = new (arena_) HBasicBlock(graph_, dex_pc);
-    branch_targets_[dex_pc] = block;
-  }
-  return block;
-}
-
-template<typename T>
-void HGraphBuilder::Unop_12x(const Instruction& instruction,
-                             Primitive::Type type,
-                             uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Conversion_12x(const Instruction& instruction,
-                                   Primitive::Type input_type,
-                                   Primitive::Type result_type,
-                                   uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), input_type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
-                                    Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
-                                  Primitive::Type type,
-                                  ComparisonBias bias,
-                                  uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
-  Thread* self = Thread::Current();
-  return cu->IsConstructor()
-      && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
-}
-
-void HGraphBuilder::BuildReturn(const Instruction& instruction,
-                                Primitive::Type type,
-                                uint32_t dex_pc) {
-  if (type == Primitive::kPrimVoid) {
-    if (graph_->ShouldGenerateConstructorBarrier()) {
-      // The compilation unit is null during testing.
-      if (dex_compilation_unit_ != nullptr) {
-        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_))
-          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
-      }
-      current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
-    }
-    current_block_->AddInstruction(new (arena_) HReturnVoid(dex_pc));
-  } else {
-    HInstruction* value = LoadLocal(instruction.VRegA(), type, dex_pc);
-    current_block_->AddInstruction(new (arena_) HReturn(value, dex_pc));
-  }
-  current_block_->AddSuccessor(exit_block_);
-  current_block_ = nullptr;
-}
-
-static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
-  switch (opcode) {
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      return kStatic;
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-      return kDirect;
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-      return kVirtual;
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      return kInterface;
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_SUPER:
-      return kSuper;
-    default:
-      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
-      UNREACHABLE();
-  }
-}
-
-ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-
-  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
-
-  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
-      *dex_compilation_unit_->GetDexFile(),
-      method_idx,
-      dex_compilation_unit_->GetDexCache(),
-      class_loader,
-      /* referrer */ nullptr,
-      invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    // Clean up any exception left by type resolution.
-    soa.Self()->ClearException();
-    return nullptr;
-  }
-
-  // Check access. The class linker has a fast path for looking into the dex cache
-  // and does not check the access if it hits it.
-  if (compiling_class.Get() == nullptr) {
-    if (!resolved_method->IsPublic()) {
-      return nullptr;
-    }
-  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
-                                                       resolved_method,
-                                                       dex_compilation_unit_->GetDexCache().Get(),
-                                                       method_idx)) {
-    return nullptr;
-  }
-
-  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
-  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
-  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
-  // which require runtime handling.
-  if (invoke_type == kSuper) {
-    if (compiling_class.Get() == nullptr) {
-      // We could not determine the method's class we need to wait until runtime.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    }
-    ArtMethod* current_method = graph_->GetArtMethod();
-    DCHECK(current_method != nullptr);
-    Handle<mirror::Class> methods_class(hs.NewHandle(
-        dex_compilation_unit_->GetClassLinker()->ResolveReferencedClassOfMethod(Thread::Current(),
-                                                                                method_idx,
-                                                                                current_method)));
-    if (methods_class.Get() == nullptr) {
-      // Invoking a super method requires knowing the actual super class. If we did not resolve
-      // the compiling method's declaring class (which only happens for ahead of time
-      // compilation), bail out.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    } else {
-      ArtMethod* actual_method;
-      if (methods_class->IsInterface()) {
-        actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
-            resolved_method, class_linker->GetImagePointerSize());
-      } else {
-        uint16_t vtable_index = resolved_method->GetMethodIndex();
-        actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
-            vtable_index, class_linker->GetImagePointerSize());
-      }
-      if (actual_method != resolved_method &&
-          !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // The back-end code generator relies on this check in order to ensure that it will not
-        // attempt to read the dex_cache with a dex_method_index that is not from the correct
-        // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
-        // builder, which means that the code-generator (and compiler driver during sharpening and
-        // inliner, maybe) might invoke an incorrect method.
-        // TODO: The actual method could still be referenced in the current dex file, so we
-        //       could try locating it.
-        // TODO: Remove the dex_file restriction.
-        return nullptr;
-      }
-      if (!actual_method->IsInvokable()) {
-        // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
-        // could resolve the callee to the wrong method.
-        return nullptr;
-      }
-      resolved_method = actual_method;
-    }
-  }
-
-  // Check for incompatible class changes. The class linker has a fast path for
-  // looking into the dex cache and does not check incompatible class changes if it hits it.
-  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
-    return nullptr;
-  }
-
-  return resolved_method;
-}
-
-bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_pc,
-                                uint32_t method_idx,
-                                uint32_t number_of_vreg_arguments,
-                                bool is_range,
-                                uint32_t* args,
-                                uint32_t register_index) {
-  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
-  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
-
-  // Remove the return type from the 'proto'.
-  size_t number_of_arguments = strlen(descriptor) - 1;
-  if (invoke_type != kStatic) {  // instance call
-    // One extra argument for 'this'.
-    number_of_arguments++;
-  }
-
-  MethodReference target_method(dex_file_, method_idx);
-
-  // Special handling for string init.
-  int32_t string_init_offset = 0;
-  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
-                                                       dex_file_,
-                                                       &string_init_offset);
-  // Replace calls to String.<init> with StringFactory.
-  if (is_string_init) {
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        dchecked_integral_cast<uint64_t>(string_init_offset),
-        0U
-    };
-    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
-        arena_,
-        number_of_arguments - 1,
-        Primitive::kPrimNot /*return_type */,
-        dex_pc,
-        method_idx,
-        target_method,
-        dispatch_info,
-        invoke_type,
-        kStatic /* optimized_invoke_type */,
-        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
-    return HandleStringInit(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor);
-  }
-
-  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
-    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
-                                                     number_of_arguments,
-                                                     return_type,
-                                                     dex_pc,
-                                                     method_idx,
-                                                     invoke_type);
-    return HandleInvoke(invoke,
-                        number_of_vreg_arguments,
-                        args,
-                        register_index,
-                        is_range,
-                        descriptor,
-                        nullptr /* clinit_check */);
-  }
-
-  // Potential class initialization check, in the case of a static method call.
-  HClinitCheck* clinit_check = nullptr;
-  HInvoke* invoke = nullptr;
-  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
-    // By default, consider that the called method implicitly requires
-    // an initialization check of its declaring method.
-    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
-        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    ScopedObjectAccess soa(Thread::Current());
-    if (invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(
-          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
-    } else if (invoke_type == kSuper) {
-      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // Update the target method to the one resolved. Note that this may be a no-op if
-        // we resolved to the method referenced by the instruction.
-        method_idx = resolved_method->GetDexMethodIndex();
-        target_method = MethodReference(dex_file_, method_idx);
-      }
-    }
-
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        0u,
-        0U
-    };
-    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
-                                                number_of_arguments,
-                                                return_type,
-                                                dex_pc,
-                                                method_idx,
-                                                target_method,
-                                                dispatch_info,
-                                                invoke_type,
-                                                invoke_type,
-                                                clinit_check_requirement);
-  } else if (invoke_type == kVirtual) {
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeVirtual(arena_,
-                                         number_of_arguments,
-                                         return_type,
-                                         dex_pc,
-                                         method_idx,
-                                         resolved_method->GetMethodIndex());
-  } else {
-    DCHECK_EQ(invoke_type, kInterface);
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeInterface(arena_,
-                                           number_of_arguments,
-                                           return_type,
-                                           dex_pc,
-                                           method_idx,
-                                           resolved_method->GetDexMethodIndex());
-  }
-
-  return HandleInvoke(invoke,
-                      number_of_vreg_arguments,
-                      args,
-                      register_index,
-                      is_range,
-                      descriptor,
-                      clinit_check);
-}
-
-bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
-  bool finalizable;
-  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
-
-  // Only the non-resolved entrypoint handles the finalizable class case. If we
-  // need access checks, then we haven't resolved the method and the class may
-  // again be finalizable.
-  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
-      ? kQuickAllocObject
-      : kQuickAllocObjectInitialized;
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-
-  if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // We currently do not support inlining allocations across dex files.
-    return false;
-  }
-
-  HLoadClass* load_class = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      outer_dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      /*needs_access_check*/ can_throw,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
-
-  current_block_->AddInstruction(load_class);
-  HInstruction* cls = load_class;
-  if (!IsInitialized(resolved_class)) {
-    cls = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  current_block_->AddInstruction(new (arena_) HNewInstance(
-      cls,
-      graph_->GetCurrentMethod(),
-      dex_pc,
-      type_index,
-      *dex_compilation_unit_->GetDexFile(),
-      can_throw,
-      finalizable,
-      entrypoint));
-  return true;
-}
-
-static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
-}
-
-bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
-  if (cls.Get() == nullptr) {
-    return false;
-  }
-
-  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
-  // check whether the class is in an image for the AOT compilation.
-  if (cls->IsInitialized() &&
-      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
-    return true;
-  }
-
-  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  // TODO: We should walk over the inlined methods, but we don't pass
-  //       that information to the builder.
-  if (IsSubClass(GetCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  return false;
-}
-
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* resolved_method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Thread* self = Thread::Current();
-  StackHandleScope<4> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
-
-  // The index at which the method's class is stored in the DexCache's type array.
-  uint32_t storage_index = DexFile::kDexNoIndex;
-  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
-    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
-    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
-                                                               GetCompilingClass(),
-                                                               resolved_method,
-                                                               method_idx,
-                                                               &storage_index);
-  }
-
-  HClinitCheck* clinit_check = nullptr;
-
-  if (IsInitialized(resolved_method_class)) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-  } else if (storage_index != DexFile::kDexNoIndex) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-    HLoadClass* load_class = new (arena_) HLoadClass(
-        graph_->GetCurrentMethod(),
-        storage_index,
-        outer_dex_file,
-        is_outer_class,
-        dex_pc,
-        /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
-    current_block_->AddInstruction(load_class);
-    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(clinit_check);
-  }
-  return clinit_check;
-}
-
-bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
-                                         uint32_t number_of_vreg_arguments,
-                                         uint32_t* args,
-                                         uint32_t register_index,
-                                         bool is_range,
-                                         const char* descriptor,
-                                         size_t start_index,
-                                         size_t* argument_index) {
-  uint32_t descriptor_index = 1;  // Skip the return type.
-  uint32_t dex_pc = invoke->GetDexPc();
-
-  for (size_t i = start_index;
-       // Make sure we don't go over the expected arguments or over the number of
-       // dex registers given. If the instruction was seen as dead by the verifier,
-       // it hasn't been properly checked.
-       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
-       i++, (*argument_index)++) {
-    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
-    if (!is_range
-        && is_wide
-        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
-      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
-      // reject any class where this is violated. However, the verifier only does these checks
-      // on non trivially dead instructions, so we just bailout the compilation.
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-      return false;
-    }
-    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    invoke->SetArgumentAt(*argument_index, arg);
-    if (is_wide) {
-      i++;
-    }
-  }
-
-  if (*argument_index != invoke->GetNumberOfArguments()) {
-    VLOG(compiler) << "Did not compile "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-    return false;
-  }
-
-  if (invoke->IsInvokeStaticOrDirect() &&
-      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
-          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
-    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
-    (*argument_index)++;
-  }
-
-  return true;
-}
-
-bool HGraphBuilder::HandleInvoke(HInvoke* invoke,
-                                 uint32_t number_of_vreg_arguments,
-                                 uint32_t* args,
-                                 uint32_t register_index,
-                                 bool is_range,
-                                 const char* descriptor,
-                                 HClinitCheck* clinit_check) {
-  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 0;
-  size_t argument_index = 0;
-  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadLocal(
-        is_range ? register_index : args[0], Primitive::kPrimNot, invoke->GetDexPc());
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
-    current_block_->AddInstruction(null_check);
-    invoke->SetArgumentAt(0, null_check);
-    start_index = 1;
-    argument_index = 1;
-  }
-
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  if (clinit_check != nullptr) {
-    // Add the class initialization check as last input of `invoke`.
-    DCHECK(invoke->IsInvokeStaticOrDirect());
-    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
-        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
-    invoke->SetArgumentAt(argument_index, clinit_check);
-    argument_index++;
-  }
-
-  current_block_->AddInstruction(invoke);
-  latest_result_ = invoke;
-
-  return true;
-}
-
-bool HGraphBuilder::HandleStringInit(HInvoke* invoke,
-                                     uint32_t number_of_vreg_arguments,
-                                     uint32_t* args,
-                                     uint32_t register_index,
-                                     bool is_range,
-                                     const char* descriptor) {
-  DCHECK(invoke->IsInvokeStaticOrDirect());
-  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 1;
-  size_t argument_index = 0;
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  // Add move-result for StringFactory method.
-  uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* new_instance = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, new_instance);
-  current_block_->AddInstruction(invoke);
-
-  latest_result_ = invoke;
-  return true;
-}
-
-static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
-  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
-  return Primitive::GetType(type[0]);
-}
-
-bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_pc,
-                                             bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_22c();
-  uint32_t obj_reg = instruction.VRegB_22c();
-  uint16_t field_index;
-  if (instruction.IsQuickened()) {
-    if (!CanDecodeQuickenedInfo()) {
-      return false;
-    }
-    field_index = LookupQuickenedInfo(dex_pc);
-  } else {
-    field_index = instruction.VRegC_22c();
-  }
-
-  ScopedObjectAccess soa(Thread::Current());
-  ArtField* resolved_field =
-      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
-
-
-  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc);
-  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  Primitive::Type field_type = (resolved_field == nullptr)
-      ? GetFieldAccessType(*dex_file_, field_index)
-      : resolved_field->GetTypeAsPrimitiveType();
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    HInstruction* field_set = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
-                                                           value,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_set = new (arena_) HInstanceFieldSet(null_check,
-                                                 value,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_set);
-  } else {
-    HInstruction* field_get = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_get = new (arena_) HInstanceFieldGet(null_check,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_get);
-    UpdateLocal(source_or_dest_reg, field_get, dex_pc);
-  }
-
-  return true;
-}
-
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
-                                   const DexCompilationUnit& compilation_unit) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *compilation_unit.GetDexFile();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-
-  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
-}
-
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
-}
-
-mirror::Class* HGraphBuilder::GetCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
-}
-
-bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // GetOutermostCompilingClass returns null when the class is unresolved
-  // (e.g. if it derives from an unresolved class). This is bogus knowing that
-  // we are compiling it.
-  // When this happens we cannot establish a direct relation between the current
-  // class and the outer class, so we return false.
-  // (Note that this is only used for optimizing invokes and field accesses)
-  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
-}
-
-void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                                     uint32_t dex_pc,
-                                                     bool is_put,
-                                                     Primitive::Type field_type) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
-  } else {
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_pc,
-                                           bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtField* resolved_field = compiler_driver_->ResolveField(
-      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
-
-  if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
-    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
-  }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // The index at which the field's class is stored in the DexCache's type array.
-  uint32_t storage_index;
-  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
-    return false;
-  } else {
-    // TODO: This is rather expensive. Perf it and cache the results if needed.
-    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
-        outer_dex_cache.Get(),
-        GetCompilingClass(),
-        resolved_field,
-        field_index,
-        &storage_index);
-    bool can_easily_access = is_put ? pair.second : pair.first;
-    if (!can_easily_access) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
-      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-      return true;
-    }
-  }
-
-  bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
-  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
-                                                 storage_index,
-                                                 outer_dex_file,
-                                                 is_outer_class,
-                                                 dex_pc,
-                                                 /*needs_access_check*/ false,
-                                                 is_in_cache);
-  current_block_->AddInstruction(constant);
-
-  HInstruction* cls = constant;
-
-  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
-  if (!IsInitialized(klass)) {
-    cls = new (arena_) HClinitCheck(constant, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  uint16_t class_def_index = klass->GetDexClassDefIndex();
-  if (is_put) {
-    // We need to keep the class alive before loading the value.
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    DCHECK_EQ(value->GetType(), field_type);
-    current_block_->AddInstruction(new (arena_) HStaticFieldSet(cls,
-                                                                value,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HStaticFieldGet(cls,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  return true;
-}
-
-void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
-                                       uint16_t first_vreg,
-                                       int64_t second_vreg_or_constant,
-                                       uint32_t dex_pc,
-                                       Primitive::Type type,
-                                       bool second_is_constant,
-                                       bool isDiv) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  HInstruction* first = LoadLocal(first_vreg, type, dex_pc);
-  HInstruction* second = nullptr;
-  if (second_is_constant) {
-    if (type == Primitive::kPrimInt) {
-      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
-    } else {
-      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
-    }
-  } else {
-    second = LoadLocal(second_vreg_or_constant, type, dex_pc);
-  }
-
-  if (!second_is_constant
-      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
-      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_pc);
-    current_block_->AddInstruction(second);
-  }
-
-  if (isDiv) {
-    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
-  }
-  UpdateLocal(out_vreg, current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_pc,
-                                     bool is_put,
-                                     Primitive::Type anticipated_type) {
-  uint8_t source_or_dest_reg = instruction.VRegA_23x();
-  uint8_t array_reg = instruction.VRegB_23x();
-  uint8_t index_reg = instruction.VRegC_23x();
-
-  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot, dex_pc);
-  object = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(object);
-
-  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
-  current_block_->AddInstruction(length);
-  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt, dex_pc);
-  index = new (arena_) HBoundsCheck(index, length, dex_pc);
-  current_block_->AddInstruction(index);
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type, dex_pc);
-    // TODO: Insert a type check node if the type is Object.
-    current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
-                                        uint32_t type_index,
-                                        uint32_t number_of_vreg_arguments,
-                                        bool is_range,
-                                        uint32_t* args,
-                                        uint32_t register_index) {
-  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  bool finalizable;
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-      ? kQuickAllocArrayWithAccessCheck
-      : kQuickAllocArray;
-  HInstruction* object = new (arena_) HNewArray(length,
-                                                graph_->GetCurrentMethod(),
-                                                dex_pc,
-                                                type_index,
-                                                *dex_compilation_unit_->GetDexFile(),
-                                                entrypoint);
-  current_block_->AddInstruction(object);
-
-  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
-  DCHECK_EQ(descriptor[0], '[') << descriptor;
-  char primitive = descriptor[1];
-  DCHECK(primitive == 'I'
-      || primitive == 'L'
-      || primitive == '[') << descriptor;
-  bool is_reference_array = (primitive == 'L') || (primitive == '[');
-  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
-
-  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
-    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_pc));
-  }
-  latest_result_ = object;
-}
-
-template <typename T>
-void HGraphBuilder::BuildFillArrayData(HInstruction* object,
-                                       const T* data,
-                                       uint32_t element_count,
-                                       Primitive::Type anticipated_type,
-                                       uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_pc));
-  }
-}
-
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot, dex_pc);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
-  current_block_->AddInstruction(length);
-
-  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
-  const Instruction::ArrayDataPayload* payload =
-      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
-  const uint8_t* data = payload->data;
-  uint32_t element_count = payload->element_count;
-
-  // Implementation of this DEX instruction seems to be that the bounds check is
-  // done before doing any stores.
-  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
-
-  switch (payload->element_width) {
-    case 1:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int8_t*>(data),
-                         element_count,
-                         Primitive::kPrimByte,
-                         dex_pc);
-      break;
-    case 2:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int16_t*>(data),
-                         element_count,
-                         Primitive::kPrimShort,
-                         dex_pc);
-      break;
-    case 4:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int32_t*>(data),
-                         element_count,
-                         Primitive::kPrimInt,
-                         dex_pc);
-      break;
-    case 8:
-      BuildFillWideArrayData(null_check,
-                             reinterpret_cast<const int64_t*>(data),
-                             element_count,
-                             dex_pc);
-      break;
-    default:
-      LOG(FATAL) << "Unknown element width for " << payload->element_width;
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
-                                           const int64_t* data,
-                                           uint32_t element_count,
-                                           uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_pc));
-  }
-}
-
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (cls.Get() == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
-void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
-                                   uint8_t destination,
-                                   uint8_t reference,
-                                   uint16_t type_index,
-                                   uint32_t dex_pc) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(),
-      *dex_compilation_unit_->GetDexFile(),
-      type_index,
-      &type_known_final,
-      &type_known_abstract,
-      &use_declaring_class);
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-
-  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
-  HLoadClass* cls = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
-  current_block_->AddInstruction(cls);
-
-  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
-  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    current_block_->AddInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
-    UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc);
-  } else {
-    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
-    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
-    // which may throw. If it succeeds BoundType sets the new type of `object`
-    // for all subsequent uses.
-    current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
-    current_block_->AddInstruction(new (arena_) HBoundType(object, dex_pc));
-    UpdateLocal(reference, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
-  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
-}
-
-void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
-                                         const Instruction& instruction,
-                                         HInstruction* value,
-                                         uint32_t dex_pc) {
-  // Add the successor blocks to the current block.
-  uint16_t num_entries = table.GetNumEntries();
-  for (size_t i = 1; i <= num_entries; i++) {
-    int32_t target_offset = table.GetEntryAt(i);
-    HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-    DCHECK(case_target != nullptr);
-
-    // Add the target block as a successor.
-    current_block_->AddSuccessor(case_target);
-  }
-
-  // Add the default target block as the last successor.
-  HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(default_target != nullptr);
-  current_block_->AddSuccessor(default_target);
-
-  // Now add the Switch instruction.
-  int32_t starting_key = table.GetEntryAt(0);
-  current_block_->AddInstruction(
-      new (arena_) HPackedSwitch(starting_key, num_entries, value, dex_pc));
-  // This block ends with control flow.
-  current_block_ = nullptr;
-}
-
-void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for PackedSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) first and lowest switch case value (entry 0, always present)
-  //   (c) list of target pcs (entries 1 <= i <= N)
-  SwitchTable table(instruction, dex_pc, false);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  // Starting key value.
-  int32_t starting_key = table.GetEntryAt(0);
-
-  // Retrieve number of entries.
-  uint16_t num_entries = table.GetNumEntries();
-  if (num_entries == 0) {
-    return;
-  }
-
-  // Don't use a packed switch if there are very few entries.
-  if (num_entries > kSmallSwitchThreshold) {
-    BuildSwitchJumpTable(table, instruction, value, dex_pc);
-  } else {
-    // Chained cmp-and-branch, starting from starting_key.
-    for (size_t i = 1; i <= num_entries; i++) {
-      BuildSwitchCaseHelper(instruction,
-                            i,
-                            i == num_entries,
-                            table,
-                            value,
-                            starting_key + i - 1,
-                            table.GetEntryAt(i),
-                            dex_pc);
-    }
-  }
-}
-
-void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for SparseSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) sorted key values (entries 0 <= i < N)
-  //   (c) target pcs corresponding to the switch values (entries N <= i < 2*N)
-  SwitchTable table(instruction, dex_pc, true);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  uint16_t num_entries = table.GetNumEntries();
-
-  for (size_t i = 0; i < num_entries; i++) {
-    BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
-                          table.GetEntryAt(i), table.GetEntryAt(i + num_entries), dex_pc);
-  }
-}
-
-void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                                          bool is_last_case, const SwitchTable& table,
-                                          HInstruction* value, int32_t case_value_int,
-                                          int32_t target_offset, uint32_t dex_pc) {
-  HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-  DCHECK(case_target != nullptr);
-
-  // The current case's value.
-  HInstruction* this_case_value = graph_->GetIntConstant(case_value_int, dex_pc);
-
-  // Compare value and this_case_value.
-  HEqual* comparison = new (arena_) HEqual(value, this_case_value, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-
-  // Case hit: use the target offset to determine where to go.
-  current_block_->AddSuccessor(case_target);
-
-  // Case miss: go to the next case (or default fall-through).
-  // When there is a next case, we use the block stored with the table offset representing this
-  // case (that is where we registered them in ComputeBranchTargets).
-  // When there is no next case, we use the following instruction.
-  // TODO: Find a good way to peel the last iteration to avoid conditional, but still have re-use.
-  if (!is_last_case) {
-    HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(index));
-    DCHECK(next_case_target != nullptr);
-    current_block_->AddSuccessor(next_case_target);
-
-    // Need to manually add the block, as there is no dex-pc transition for the cases.
-    graph_->AddBlock(next_case_target);
-
-    current_block_ = next_case_target;
-  } else {
-    HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-    DCHECK(default_target != nullptr);
-    current_block_->AddSuccessor(default_target);
-    current_block_ = nullptr;
-  }
-}
-
-bool HGraphBuilder::CanDecodeQuickenedInfo() const {
-  return interpreter_metadata_ != nullptr;
-}
-
-uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
-  DCHECK(interpreter_metadata_ != nullptr);
-  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-  DCHECK_EQ(dex_pc, dex_pc_in_map);
-  return DecodeUnsignedLeb128(&interpreter_metadata_);
-}
-
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
-  if (current_block_ == nullptr) {
-    return true;  // Dead code
-  }
-
-  switch (instruction.Opcode()) {
-    case Instruction::CONST_4: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_16: {
-      int32_t register_index = instruction.VRegA();
-      // Get 16 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_21s();
-      value <<= 48;
-      value >>= 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_32: {
-      int32_t register_index = instruction.VRegA();
-      // Get 32 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_31i();
-      value <<= 32;
-      value >>= 32;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE: {
-      int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_FROM16:
-    case Instruction::MOVE_WIDE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_OBJECT_FROM16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_VOID_NO_BARRIER:
-    case Instruction::RETURN_VOID: {
-      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
-      break;
-    }
-
-#define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
-
-    IF_XX(HEqual, EQ);
-    IF_XX(HNotEqual, NE);
-    IF_XX(HLessThan, LT);
-    IF_XX(HLessThanOrEqual, LE);
-    IF_XX(HGreaterThan, GT);
-    IF_XX(HGreaterThanOrEqual, GE);
-
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32: {
-      int32_t offset = instruction.GetTargetOffset();
-      HBasicBlock* target = FindBlockStartingAt(offset + dex_pc);
-      DCHECK(target != nullptr);
-      current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-      current_block_->AddSuccessor(target);
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::RETURN: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_35c();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, false, args, -1)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_3rc();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, true, nullptr, register_index)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::NEG_INT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_LONG: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_FLOAT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_DOUBLE: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_INT: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_LONG: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::REM_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT16: {
-      Binop_22s<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT16: {
-      Binop_22s<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT16: {
-      Binop_22s<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT16: {
-      Binop_22s<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT: {
-      Binop_22s<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT16: {
-      Binop_22s<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT8: {
-      Binop_22b<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT8: {
-      Binop_22b<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT8: {
-      Binop_22b<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT8: {
-      Binop_22b<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT_LIT8: {
-      Binop_22b<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT8: {
-      Binop_22b<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, true);
-      break;
-    }
-
-    case Instruction::REM_INT_LIT16:
-    case Instruction::REM_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, false);
-      break;
-    }
-
-    case Instruction::SHL_INT_LIT8: {
-      Binop_22b<HShl>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_LIT8: {
-      Binop_22b<HShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_LIT8: {
-      Binop_22b<HUShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_INSTANCE: {
-      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
-        return false;
-      }
-      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_ARRAY: {
-      uint16_t type_index = instruction.VRegC_22c();
-      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      bool finalizable;
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-          ? kQuickAllocArrayWithAccessCheck
-          : kQuickAllocArray;
-      current_block_->AddInstruction(new (arena_) HNewArray(length,
-                                                            graph_->GetCurrentMethod(),
-                                                            dex_pc,
-                                                            type_index,
-                                                            *dex_compilation_unit_->GetDexFile(),
-                                                            entrypoint));
-      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t type_index = instruction.VRegB_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY_RANGE: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t type_index = instruction.VRegB_3rc();
-      uint32_t register_index = instruction.VRegC_3rc();
-      BuildFilledNewArray(
-          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
-      break;
-    }
-
-    case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
-    case Instruction::MOVE_RESULT_OBJECT: {
-      if (latest_result_ == nullptr) {
-        // Only dead code can lead to this situation, where the verifier
-        // does not reject the method.
-      } else {
-        // An Invoke/FilledNewArray and its MoveResult could have landed in
-        // different blocks if there was a try/catch block boundary between
-        // them. For Invoke, we insert a StoreLocal after the instruction. For
-        // FilledNewArray, the local needs to be updated after the array was
-        // filled, otherwise we might overwrite an input vreg.
-        HStoreLocal* update_local =
-            new (arena_) HStoreLocal(GetLocalAt(instruction.VRegA()), latest_result_, dex_pc);
-        HBasicBlock* block = latest_result_->GetBlock();
-        if (block == current_block_) {
-          // MoveResult and the previous instruction are in the same block.
-          current_block_->AddInstruction(update_local);
-        } else {
-          // The two instructions are in different blocks. Insert the MoveResult
-          // before the final control-flow instruction of the previous block.
-          DCHECK(block->EndsWithControlFlowInstruction());
-          DCHECK(current_block_->GetInstructions().IsEmpty());
-          block->InsertInstructionBefore(update_local, block->GetLastInstruction());
-        }
-        latest_result_ = nullptr;
-      }
-      break;
-    }
-
-    case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::NOP:
-      break;
-
-    case Instruction::IGET:
-    case Instruction::IGET_QUICK:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_WIDE_QUICK:
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET_OBJECT_QUICK:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BOOLEAN_QUICK:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_BYTE_QUICK:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_CHAR_QUICK:
-    case Instruction::IGET_SHORT:
-    case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::IPUT:
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_SHORT:
-    case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-#define ARRAY_XX(kind, anticipated_type)                                          \
-    case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
-      break;                                                                      \
-    }                                                                             \
-    case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
-      break;                                                                      \
-    }
-
-    ARRAY_XX(, Primitive::kPrimInt);
-    ARRAY_XX(_WIDE, Primitive::kPrimLong);
-    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
-    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
-    ARRAY_XX(_BYTE, Primitive::kPrimByte);
-    ARRAY_XX(_CHAR, Primitive::kPrimChar);
-    ARRAY_XX(_SHORT, Primitive::kPrimShort);
-
-    case Instruction::ARRAY_LENGTH: {
-      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot, dex_pc);
-      object = new (arena_) HNullCheck(object, dex_pc);
-      current_block_->AddInstruction(object);
-      current_block_->AddInstruction(new (arena_) HArrayLength(object, dex_pc));
-      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING: {
-      uint32_t string_index = instruction.VRegB_21c();
-      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
-          *dex_file_, string_index);
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING_JUMBO: {
-      uint32_t string_index = instruction.VRegB_31c();
-      bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
-          *dex_file_, string_index);
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
-      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_CLASS: {
-      uint16_t type_index = instruction.VRegB_21c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool dont_use_is_referrers_class;
-      // `CanAccessTypeWithoutChecks` will tell whether the method being
-      // built is trying to access its own class, so that the generated
-      // code can optimize for this case. However, the optimization does not
-      // work for inlining, so we use `IsOutermostCompilingClass` instead.
-      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
-      current_block_->AddInstruction(new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          type_index,
-          *dex_file_,
-          IsOutermostCompilingClass(type_index),
-          dex_pc,
-          !can_access,
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_EXCEPTION: {
-      current_block_->AddInstruction(new (arena_) HLoadException(dex_pc));
-      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction(), dex_pc);
-      current_block_->AddInstruction(new (arena_) HClearException(dex_pc));
-      break;
-    }
-
-    case Instruction::THROW: {
-      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
-      // A throw instruction must branch to the exit block.
-      current_block_->AddSuccessor(exit_block_);
-      // We finished building this block. Set the current block to null to avoid
-      // adding dead instructions to it.
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::INSTANCE_OF: {
-      uint8_t destination = instruction.VRegA_22c();
-      uint8_t reference = instruction.VRegB_22c();
-      uint16_t type_index = instruction.VRegC_22c();
-      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::CHECK_CAST: {
-      uint8_t reference = instruction.VRegA_21c();
-      uint16_t type_index = instruction.VRegB_21c();
-      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::MONITOR_ENTER: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kEnter,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::MONITOR_EXIT: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kExit,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::PACKED_SWITCH: {
-      BuildPackedSwitch(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::SPARSE_SWITCH: {
-      BuildSparseSwitch(instruction, dex_pc);
-      break;
-    }
-
-    default:
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of unhandled instruction "
-                     << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
-      return false;
-  }
-  return true;
-}  // NOLINT(readability/fn_size)
-
-HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  return locals_[register_index];
-}
-
-void HGraphBuilder::UpdateLocal(uint32_t register_index,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc));
-}
-
-HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index,
-                                       Primitive::Type type,
-                                       uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HLoadLocal(local, type, dex_pc));
-  return current_block_->GetLastInstruction();
+  // 5) Type the graph and eliminate dead/redundant phis.
+  return ssa_builder_.BuildSsa();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 48f5316..580ef72 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,320 +19,90 @@
 
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "block_builder.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "instruction_builder.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
 #include "nodes.h"
+#include "ssa_builder.h"
 
 namespace art {
 
-class Instruction;
-
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(HGraph* graph,
                 DexCompilationUnit* dex_compilation_unit,
                 const DexCompilationUnit* const outer_compilation_unit,
                 const DexFile* dex_file,
+                const DexFile::CodeItem& code_item,
                 CompilerDriver* driver,
                 OptimizingCompilerStats* compiler_stats,
                 const uint8_t* interpreter_metadata,
-                Handle<mirror::DexCache> dex_cache)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+                Handle<mirror::DexCache> dex_cache,
+                StackHandleScopeCollection* handles)
+      : graph_(graph),
         dex_file_(dex_file),
+        code_item_(code_item),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
-        outer_compilation_unit_(outer_compilation_unit),
-        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
-        code_start_(nullptr),
-        latest_result_(nullptr),
         compilation_stats_(compiler_stats),
-        interpreter_metadata_(interpreter_metadata),
-        dex_cache_(dex_cache) {}
+        block_builder_(graph, dex_file, code_item),
+        ssa_builder_(graph, dex_compilation_unit->GetDexCache(), handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             dex_file,
+                             code_item_,
+                             Primitive::GetType(dex_compilation_unit_->GetShorty()[0]),
+                             dex_compilation_unit,
+                             outer_compilation_unit,
+                             driver,
+                             interpreter_metadata,
+                             compiler_stats,
+                             dex_cache) {}
 
   // Only for unit testing.
-  HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+  HGraphBuilder(HGraph* graph,
+                const DexFile::CodeItem& code_item,
+                StackHandleScopeCollection* handles,
+                Primitive::Type return_type = Primitive::kPrimInt)
+      : graph_(graph),
         dex_file_(nullptr),
+        code_item_(code_item),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
-        outer_compilation_unit_(nullptr),
-        return_type_(return_type),
-        code_start_(nullptr),
-        latest_result_(nullptr),
-        compilation_stats_(nullptr),
-        interpreter_metadata_(nullptr),
         null_dex_cache_(),
-        dex_cache_(null_dex_cache_) {}
+        compilation_stats_(nullptr),
+        block_builder_(graph, nullptr, code_item),
+        ssa_builder_(graph, null_dex_cache_, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             /* dex_file */ nullptr,
+                             code_item_,
+                             return_type,
+                             /* dex_compilation_unit */ nullptr,
+                             /* outer_compilation_unit */ nullptr,
+                             /* compiler_driver */ nullptr,
+                             /* interpreter_metadata */ nullptr,
+                             /* compiler_stats */ nullptr,
+                             null_dex_cache_) {}
 
-  GraphAnalysisResult BuildGraph(const DexFile::CodeItem& code,
-                                 StackHandleScopeCollection* handles);
+  GraphAnalysisResult BuildGraph();
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table or specified
-  // compare/jump series.
-  static constexpr uint16_t kSmallSwitchThreshold = 3;
-
  private:
-  // Analyzes the dex instruction and adds HInstruction to the graph
-  // to execute that instruction. Returns whether the instruction can
-  // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
-
-  // Finds all instructions that start a new block, and populates branch_targets_ with
-  // the newly created blocks.
-  // As a side effect, also compute the number of dex instructions, blocks, and
-  // branches.
-  // Returns true if all the branches fall inside the method code, false otherwise.
-  // (In normal cases this should always return true but someone can artificially
-  // create a code unit in which branches fall-through out of it).
-  bool ComputeBranchTargets(const uint16_t* start,
-                            const uint16_t* end,
-                            size_t* number_of_branches);
-  void MaybeUpdateCurrentBlock(size_t dex_pc);
-  void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations);
-  HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
-  HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
-
-  // Adds new blocks to `branch_targets_` starting at the limits of TryItems and
-  // their exception handlers.
-  void CreateBlocksForTryCatch(const DexFile::CodeItem& code_item);
-
-  // Splits edges which cross the boundaries of TryItems, inserts TryBoundary
-  // instructions and links them to the corresponding catch blocks.
-  void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item);
-
-  // Iterates over the exception handlers of `try_item`, finds the corresponding
-  // catch blocks and makes them successors of `try_boundary`. The order of
-  // successors matches the order in which runtime exception delivery searches
-  // for a handler.
-  void LinkToCatchBlocks(HTryBoundary* try_boundary,
-                         const DexFile::CodeItem& code_item,
-                         const DexFile::TryItem* try_item);
-
-  bool CanDecodeQuickenedInfo() const;
-  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
-
-  void InitializeLocals(uint16_t count);
-  HLocal* GetLocalAt(uint32_t register_index) const;
-  void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const;
-  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
-  void InitializeParameters(uint16_t number_of_parameters);
-
-  // Returns whether the current method needs access check for the type.
-  // Output parameter finalizable is set to whether the type is finalizable.
-  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
-
-  template<typename T>
-  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  void Binop_23x_cmp(const Instruction& instruction,
-                     Primitive::Type type,
-                     ComparisonBias bias,
-                     uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
-
-  void Conversion_12x(const Instruction& instruction,
-                      Primitive::Type input_type,
-                      Primitive::Type result_type,
-                      uint32_t dex_pc);
-
-  void BuildCheckedDivRem(uint16_t out_reg,
-                          uint16_t first_reg,
-                          int64_t second_reg_or_constant,
-                          uint32_t dex_pc,
-                          Primitive::Type type,
-                          bool second_is_lit,
-                          bool is_div);
-
-  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                        uint32_t dex_pc,
-                                        bool is_put,
-                                        Primitive::Type field_type);
-  // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_pc,
-                        bool is_get,
-                        Primitive::Type anticipated_type);
-
-  // Builds an invocation node and returns whether the instruction is supported.
-  bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_pc,
-                   uint32_t method_idx,
-                   uint32_t number_of_vreg_arguments,
-                   bool is_range,
-                   uint32_t* args,
-                   uint32_t register_index);
-
-  // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_pc,
-                           uint32_t type_index,
-                           uint32_t number_of_vreg_arguments,
-                           bool is_range,
-                           uint32_t* args,
-                           uint32_t register_index);
-
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. Currently only used for non-reference and non-floating point
-  // arrays.
-  template <typename T>
-  void BuildFillArrayData(HInstruction* object,
-                          const T* data,
-                          uint32_t element_count,
-                          Primitive::Type anticipated_type,
-                          uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. The data must be for long and double arrays.
-  void BuildFillWideArrayData(HInstruction* object,
-                              const int64_t* data,
-                              uint32_t element_count,
-                              uint32_t dex_pc);
-
-  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
-  void BuildTypeCheck(const Instruction& instruction,
-                      uint8_t destination,
-                      uint8_t reference,
-                      uint16_t type_index,
-                      uint32_t dex_pc);
-
-  // Builds an instruction sequence for a packed switch statement.
-  void BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  // Build a switch instruction from a packed switch statement.
-  void BuildSwitchJumpTable(const SwitchTable& table,
-                            const Instruction& instruction,
-                            HInstruction* value,
-                            uint32_t dex_pc);
-
-  // Builds an instruction sequence for a sparse switch statement.
-  void BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  void BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                             bool is_last_case, const SwitchTable& table,
-                             HInstruction* value, int32_t case_value_int,
-                             int32_t target_offset, uint32_t dex_pc);
-
-  bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
-
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
+  bool SkipCompilation(size_t number_of_branches);
 
-  // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
-
-  // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
-
-  // Returns whether `type_index` points to the outer-most compiling method's class.
-  bool IsOutermostCompilingClass(uint16_t type_index) const;
-
-  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                     uint32_t dex_pc,
-                                     HInvoke* invoke);
-
-  bool SetupInvokeArguments(HInvoke* invoke,
-                            uint32_t number_of_vreg_arguments,
-                            uint32_t* args,
-                            uint32_t register_index,
-                            bool is_range,
-                            const char* descriptor,
-                            size_t start_index,
-                            size_t* argument_index);
-
-  bool HandleInvoke(HInvoke* invoke,
-                    uint32_t number_of_vreg_arguments,
-                    uint32_t* args,
-                    uint32_t register_index,
-                    bool is_range,
-                    const char* descriptor,
-                    HClinitCheck* clinit_check);
-
-  bool HandleStringInit(HInvoke* invoke,
-                        uint32_t number_of_vreg_arguments,
-                        uint32_t* args,
-                        uint32_t register_index,
-                        bool is_range,
-                        const char* descriptor);
-
-  HClinitCheck* ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Build a HNewInstance instruction.
-  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
-
-  // Return whether the compiler can assume `cls` is initialized.
-  bool IsInitialized(Handle<mirror::Class> cls) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Try to resolve a method using the class linker. Return null if a method could
-  // not be resolved.
-  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
-
-  ArenaAllocator* const arena_;
-
-  // A list of the size of the dex code holding block information for
-  // the method. If an entry contains a block, then the dex instruction
-  // starting at that entry is the first instruction of a new block.
-  ArenaVector<HBasicBlock*> branch_targets_;
-
-  ArenaVector<HLocal*> locals_;
-
-  HBasicBlock* entry_block_;
-  HBasicBlock* exit_block_;
-  HBasicBlock* current_block_;
   HGraph* const graph_;
-
-  // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
 
   // The compilation unit of the current method being compiled. Note that
   // it can be an inlined method.
@@ -340,29 +110,13 @@
 
   CompilerDriver* const compiler_driver_;
 
-  // The compilation unit of the outermost method being compiled. That is the
-  // method being compiled (and not inlined), and potentially inlining other
-  // methods.
-  const DexCompilationUnit* const outer_compilation_unit_;
-
-  // The return type of the method being compiled.
-  const Primitive::Type return_type_;
-
-  // The pointer in the dex file where the instructions of the code item
-  // being currently compiled start.
-  const uint16_t* code_start_;
-
-  // The last invoke or fill-new-array being built. Only to be
-  // used by move-result instructions.
-  HInstruction* latest_result_;
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
 
   OptimizingCompilerStats* compilation_stats_;
 
-  const uint8_t* interpreter_metadata_;
-
-  // Dex cache for dex_file_.
-  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
-  Handle<mirror::DexCache> dex_cache_;
+  HBasicBlockBuilder block_builder_;
+  SsaBuilder ssa_builder_;
+  HInstructionBuilder instruction_builder_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
new file mode 100644
index 0000000..6dfffce
--- /dev/null
+++ b/compiler/optimizing/bytecode_utils.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+
+namespace art {
+
+class CodeItemIterator : public ValueObject {
+ public:
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+      : code_ptr_(code_item.insns_ + start_dex_pc),
+        code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
+        dex_pc_(start_dex_pc) {}
+
+  bool Done() const { return code_ptr_ >= code_end_; }
+  bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; }
+
+  const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); }
+  uint32_t CurrentDexPc() const { return dex_pc_; }
+
+  void Advance() {
+    DCHECK(!Done());
+    size_t instruction_size = CurrentInstruction().SizeInCodeUnits();
+    code_ptr_ += instruction_size;
+    dex_pc_ += instruction_size;
+  }
+
+ private:
+  const uint16_t* code_ptr_;
+  const uint16_t* const code_end_;
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeItemIterator);
+};
+
+class DexSwitchTable : public ValueObject {
+ public:
+  DexSwitchTable(const Instruction& instruction, uint32_t dex_pc)
+      : instruction_(instruction),
+        dex_pc_(dex_pc),
+        sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature)
+                                : static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  void CheckIndex(size_t index) const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+    }
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    CheckIndex(index);
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    CheckIndex(index);
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+  // Index of the first value in the table.
+  size_t GetFirstValueIndex() const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      return num_entries_;
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      return 1;
+    }
+  }
+
+  bool IsSparse() const { return sparse_; }
+
+  bool ShouldBuildDecisionTree() {
+    return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold;
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
+
+  DISALLOW_COPY_AND_ASSIGN(DexSwitchTable);
+};
+
+class DexSwitchTableIterator {
+ public:
+  explicit DexSwitchTableIterator(const DexSwitchTable& table)
+      : table_(table),
+        num_entries_(static_cast<size_t>(table_.GetNumEntries())),
+        first_target_offset_(table_.GetFirstValueIndex()),
+        index_(0u) {}
+
+  bool Done() const { return index_ >= num_entries_; }
+  bool IsLast() const { return index_ == num_entries_ - 1; }
+
+  void Advance() {
+    DCHECK(!Done());
+    index_++;
+  }
+
+  int32_t CurrentKey() const {
+    return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_;
+  }
+
+  int32_t CurrentTargetOffset() const {
+    return table_.GetEntryAt(index_ + first_target_offset_);
+  }
+
+  uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); }
+
+ private:
+  const DexSwitchTable& table_;
+  const size_t num_entries_;
+  const size_t first_target_offset_;
+
+  size_t index_;
+};
+
+inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) {
+  return CodeItemIterator(code_item, dex_pc).CurrentInstruction();
+}
+
+inline bool IsThrowingDexInstruction(const Instruction& instruction) {
+  // Special-case MONITOR_EXIT which is a throwing instruction but the verifier
+  // guarantees that it will never throw. This is necessary to avoid rejecting
+  // 'synchronized' blocks/methods.
+  return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 32869ec..51fbaea 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,22 +40,20 @@
 #include "code_generator_mips64.h"
 #endif
 
+#include "bytecode_utils.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
-#include "gc_map_builder.h"
 #include "graph_visualizer.h"
 #include "intrinsics.h"
 #include "leb128.h"
-#include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "mirror/string.h"
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
-#include "verifier/dex_gc_map.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -142,6 +140,12 @@
   return pointer_size * index;
 }
 
+uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) {
+  return array_length->IsStringLength()
+      ? mirror::String::CountOffset().Uint32Value()
+      : mirror::Array::LengthOffset().Uint32Value();
+}
+
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -190,7 +194,8 @@
 
 void CodeGenerator::GenerateSlowPaths() {
   size_t code_start = 0;
-  for (SlowPathCode* slow_path : slow_paths_) {
+  for (const std::unique_ptr<SlowPathCode>& slow_path_unique_ptr : slow_paths_) {
+    SlowPathCode* slow_path = slow_path_unique_ptr.get();
     current_slow_path_ = slow_path;
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
@@ -299,23 +304,6 @@
   }
 }
 
-int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  if (reg_number >= number_of_locals) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode.
-    return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet())  // ART method
-                          + (reg_number - number_of_locals) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - FrameEntrySpillSize()
-                          - kVRegSize  // filler.
-                          - (number_of_locals * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
@@ -572,59 +560,66 @@
   }
 }
 
-CodeGenerator* CodeGenerator::Create(HGraph* graph,
-                                     InstructionSet instruction_set,
-                                     const InstructionSetFeatures& isa_features,
-                                     const CompilerOptions& compiler_options,
-                                     OptimizingCompilerStats* stats) {
+std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
+                                                     InstructionSet instruction_set,
+                                                     const InstructionSetFeatures& isa_features,
+                                                     const CompilerOptions& compiler_options,
+                                                     OptimizingCompilerStats* stats) {
+  ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2: {
-      return new arm::CodeGeneratorARM(graph,
-                                      *isa_features.AsArmInstructionSetFeatures(),
-                                      compiler_options,
-                                      stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) arm::CodeGeneratorARM(graph,
+                                            *isa_features.AsArmInstructionSetFeatures(),
+                                            compiler_options,
+                                            stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
-      return new arm64::CodeGeneratorARM64(graph,
-                                          *isa_features.AsArm64InstructionSetFeatures(),
-                                          compiler_options,
-                                          stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) arm64::CodeGeneratorARM64(graph,
+                                                *isa_features.AsArm64InstructionSetFeatures(),
+                                                compiler_options,
+                                                stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips: {
-      return new mips::CodeGeneratorMIPS(graph,
-                                         *isa_features.AsMipsInstructionSetFeatures(),
-                                         compiler_options,
-                                         stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) mips::CodeGeneratorMIPS(graph,
+                                              *isa_features.AsMipsInstructionSetFeatures(),
+                                              compiler_options,
+                                              stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64: {
-      return new mips64::CodeGeneratorMIPS64(graph,
-                                            *isa_features.AsMips64InstructionSetFeatures(),
-                                            compiler_options,
-                                            stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) mips64::CodeGeneratorMIPS64(graph,
+                                                  *isa_features.AsMips64InstructionSetFeatures(),
+                                                  compiler_options,
+                                                  stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
-      return new x86::CodeGeneratorX86(graph,
-                                      *isa_features.AsX86InstructionSetFeatures(),
-                                      compiler_options,
-                                      stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) x86::CodeGeneratorX86(graph,
+                                            *isa_features.AsX86InstructionSetFeatures(),
+                                            compiler_options,
+                                            stats));
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64: {
-      return new x86_64::CodeGeneratorX86_64(graph,
-                                            *isa_features.AsX86_64InstructionSetFeatures(),
-                                            compiler_options,
-                                            stats);
+      return std::unique_ptr<CodeGenerator>(
+          new (arena) x86_64::CodeGeneratorX86_64(graph,
+                                                  *isa_features.AsX86_64InstructionSetFeatures(),
+                                                  compiler_options,
+                                                  stats));
     }
 #endif
     default:
@@ -641,7 +636,7 @@
                         const CodeInfo& code_info,
                         const ArenaVector<HSuspendCheck*>& loop_headers,
                         ArenaVector<size_t>* covered) {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   for (size_t i = 0; i < loop_headers.size(); ++i) {
     if (loop_headers[i]->GetDexPc() == dex_pc) {
       if (graph.IsCompilingOsr()) {
@@ -681,7 +676,7 @@
       uint32_t target = dex_pc + instruction.GetTargetOffset();
       CheckCovers(target, graph, code_info, loop_headers, &covered);
     } else if (instruction.IsSwitch()) {
-      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+      DexSwitchTable table(instruction, dex_pc);
       uint16_t num_entries = table.GetNumEntries();
       size_t offset = table.GetFirstValueIndex();
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e56323f..6e75e3b 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -67,7 +67,7 @@
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
 };
 
-class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
+class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
  public:
   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
@@ -166,15 +166,15 @@
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
 };
 
-class CodeGenerator {
+class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
  public:
   // Compiles the graph to executable instructions.
   void Compile(CodeAllocator* allocator);
-  static CodeGenerator* Create(HGraph* graph,
-                               InstructionSet instruction_set,
-                               const InstructionSetFeatures& isa_features,
-                               const CompilerOptions& compiler_options,
-                               OptimizingCompilerStats* stats = nullptr);
+  static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
+                                               InstructionSet instruction_set,
+                                               const InstructionSetFeatures& isa_features,
+                                               const CompilerOptions& compiler_options,
+                                               OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGenerator() {}
 
   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
@@ -205,13 +205,12 @@
   virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;
   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
-  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
+  virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
   void InitializeCodeGeneration(size_t number_of_spill_slots,
                                 size_t maximum_number_of_live_core_registers,
                                 size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
-  int32_t GetStackSlot(HLocal* local) const;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -299,8 +298,9 @@
   // save live registers, which may be needed by the runtime to set catch phis.
   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
 
+  // TODO: Avoid creating the `std::unique_ptr` here.
   void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.push_back(slow_path);
+    slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
   }
 
   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
@@ -340,6 +340,11 @@
   // Pointer variant for ArtMethod and ArtField arrays.
   size_t GetCachePointerOffset(uint32_t index);
 
+  // Helper that returns the offset of the array's length field.
+  // Note: Besides the normal arrays, we also use the HArrayLength for
+  // accessing the String's `count` field in String intrinsics.
+  static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -443,6 +448,11 @@
                              uint32_t dex_pc,
                              SlowPathCode* slow_path) = 0;
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  virtual HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) = 0;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -471,6 +481,18 @@
     LabelType label;
   };
 
+  // String patch info used for recording locations of required linker patches and
+  // target strings. The actual string address can be absolute or PC-relative.
+  template <typename LabelType>
+  struct StringPatchInfo {
+    StringPatchInfo(const DexFile& df, uint32_t index)
+        : dex_file(df), string_index(index), label() { }
+
+    const DexFile& dex_file;
+    uint32_t string_index;
+    LabelType label;
+  };
+
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
@@ -508,8 +530,6 @@
     slow_paths_.reserve(8);
   }
 
-  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
-
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
@@ -603,7 +623,7 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
-  ArenaVector<SlowPathCode*> slow_paths_;
+  ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
 
   // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3a18a0d..e010662 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -777,15 +777,21 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
-                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -948,30 +954,6 @@
   __ BindTrackedLabel(label);
 }
 
-Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1718,49 +1700,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderARM::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -4803,7 +4742,7 @@
 
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -5221,12 +5160,57 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -5234,16 +5218,73 @@
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-  GenerateGcRootFieldLoad(
-      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  // /* GcRoot<mirror::String> */ out = out[string_index]
-  GenerateGcRootFieldLoad(
-      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                      load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
+      // a 128B range. To try and reduce the number of literals if we load multiple strings,
+      // simply split the dex cache address to a 128B aligned base loaded from a literal
+      // and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(load->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
+      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
+      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
+      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      GenerateGcRootFieldLoad(load, out_loc, out, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
+      int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
+      GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(
+          load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(
+          load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
@@ -6220,6 +6261,8 @@
   HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
   // We disable pc-relative load when there is an irreducible loop, as the optimization
   // is incompatible with it.
+  // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+  // with irreducible loops.
   if (GetGraph()->HasIrreducibleLoops() &&
       (dispatch_info.method_load_kind ==
           HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
@@ -6330,8 +6373,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
       break;
     }
@@ -6399,13 +6443,49 @@
   __ blx(LR);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                             uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
+Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) {
+  return DeduplicateUint32Literal(address, &uint32_literals_);
+}
+
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
       method_patches_.size() +
       call_patches_.size() +
       relative_call_patches_.size() +
-      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
+      /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() +
+      boot_image_string_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -6431,41 +6511,75 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const auto& pair : dex_cache_arrays_base_labels_) {
-    HArmDexCacheArraysBase* base = pair.first;
-    const DexCacheArraysBaseLabels* labels = &pair.second;
-    const DexFile& dex_file = base->GetDexFile();
-    size_t base_element_offset = base->GetElementOffset();
-    DCHECK(labels->add_pc_label.IsBound());
-    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t base_element_offset = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
     // Add MOVW patch.
-    DCHECK(labels->movw_label.IsBound());
-    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
                                                               &dex_file,
                                                               add_pc_offset,
                                                               base_element_offset));
     // Add MOVT patch.
-    DCHECK(labels->movt_label.IsBound());
-    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
                                                               &dex_file,
                                                               add_pc_offset,
                                                               base_element_offset));
   }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    uint32_t string_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset,
+                                                               &dex_file,
+                                                               add_pc_offset,
+                                                               string_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset,
+                                                               &dex_file,
+                                                               add_pc_offset,
+                                                               string_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+Literal* CodeGeneratorARM::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
                                                     MethodToLiteralMap* map) {
-  // Look up the literal for target_method.
-  auto lb = map->lower_bound(target_method);
-  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this method yet, insert a new one.
-  Literal* literal = __ NewLiteral<uint32_t>(0u);
-  map->PutBefore(lb, target_method, literal);
-  return literal;
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodAddressLiteral(MethodReference target_method) {
@@ -6600,16 +6714,16 @@
 void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
   locations->SetOut(Location::RequiresRegister());
-  codegen_->AddDexCacheArraysBase(base);
 }
 
 void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
   Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
-  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  CodeGeneratorARM::PcRelativePatchInfo* labels =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
   __ BindTrackedLabel(&labels->movw_label);
-  __ movw(base_reg, 0u);
+  __ movw(base_reg, /* placeholder */ 0u);
   __ BindTrackedLabel(&labels->movt_label);
-  __ movt(base_reg, 0u);
+  __ movt(base_reg, /* placeholder */ 0u);
   __ BindTrackedLabel(&labels->add_pc_label);
   __ add(base_reg, base_reg, ShifterOperand(PC));
 }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index cc4aa14..0020f7b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -23,6 +23,7 @@
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
+#include "utils/string_reference.h"
 
 namespace art {
 namespace arm {
@@ -338,14 +339,12 @@
     return assembler_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
@@ -403,6 +402,11 @@
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -414,32 +418,34 @@
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
-  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
-
-  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  // The PC-relative address is loaded with three instructions, MOVW+MOVT
   // to load the offset to base_reg and then ADD base_reg, PC. The offset is
   // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
   // currently emit these 3 instructions together, instruction scheduling could
   // split this sequence apart, so we keep separate labels for each of them.
-  struct DexCacheArraysBaseLabels {
-    DexCacheArraysBaseLabels() = default;
-    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
 
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string index.
+    uint32_t offset_or_index;
     Label movw_label;
     Label movt_label;
     Label add_pc_label;
   };
 
-  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
-    DexCacheArraysBaseLabels labels;
-    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
-  }
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+  Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
 
-  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
-    auto it = dex_cache_arrays_base_labels_.find(base);
-    DCHECK(it != dex_cache_arrays_base_labels_.end());
-    return &it->second;
-  }
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
@@ -525,14 +531,19 @@
 
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
-  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
-                                                     DexCacheArraysBaseLabels,
-                                                     std::less<HArmDexCacheArraysBase*>>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
 
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
 
   // Labels for each block that will be compiled.
   Label* block_labels_;  // Indexed by block id.
@@ -543,14 +554,22 @@
   Thumb2Assembler assembler_;
   const ArmInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
   MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
-
-  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+  // PC-relative patch info for each HArmDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1f577b3..261c04f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -899,12 +899,15 @@
                     callee_saved_fp_registers.list(),
                     compiler_options,
                     stats),
-      block_labels_(nullptr),
+      block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
@@ -912,7 +915,12 @@
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -920,7 +928,7 @@
 #define __ GetVIXLAssembler()->
 
 void CodeGeneratorARM64::EmitJumpTables() {
-  for (auto jump_table : jump_tables_) {
+  for (auto&& jump_table : jump_tables_) {
     jump_table->EmitTable(this);
   }
 }
@@ -1065,31 +1073,6 @@
   }
 }
 
-Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
@@ -2135,9 +2118,9 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction),
-         HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
@@ -3662,23 +3645,21 @@
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       // Add ADRP with its PC-relative DexCache access patch.
-      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                                  invoke->GetDexCacheArrayOffset());
-      vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label;
+      const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
+      uint32_t element_offset = invoke->GetDexCacheArrayOffset();
+      vixl::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
         vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
-        __ Bind(pc_insn_label);
-        __ adrp(XRegisterFrom(temp), 0);
+        __ Bind(adrp_label);
+        __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
       }
-      pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
       // Add LDR with its PC-relative DexCache access patch.
-      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                                  invoke->GetDexCacheArrayOffset());
+      vixl::Label* ldr_label =
+          NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
       {
         vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
-        __ Bind(&pc_relative_dex_cache_patches_.back().label);
-        __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0));
-        pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+        __ Bind(ldr_label);
+        __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
       }
       break;
     }
@@ -3700,7 +3681,8 @@
              MemOperand(method_reg.X(),
                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
       // temp = temp[index_in_cache];
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
     __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -3772,13 +3754,58 @@
   __ Blr(lr);
 }
 
+vixl::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                          uint32_t string_index,
+                                                          vixl::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
+}
+
+vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                                 uint32_t element_offset,
+                                                                 vixl::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
+}
+
+vixl::Label* CodeGeneratorARM64::NewPcRelativePatch(const DexFile& dex_file,
+                                                    uint32_t offset_or_index,
+                                                    vixl::Label* adrp_label,
+                                                    ArenaDeque<PcRelativePatchInfo>* patches) {
+  // Add a patch entry and return the label.
+  patches->emplace_back(dex_file, offset_or_index);
+  PcRelativePatchInfo* info = &patches->back();
+  vixl::Label* label = &info->label;
+  // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
+  info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
+  return label;
+}
+
+vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
+    const DexFile& dex_file, uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
+vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
+vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(uint64_t address) {
+  return DeduplicateUint64Literal(address);
+}
+
 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
       method_patches_.size() +
       call_patches_.size() +
       relative_call_patches_.size() +
-      pc_relative_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      boot_image_string_patches_.size() +
+      pc_relative_string_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -3799,38 +3826,51 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
                                                               &info.target_dex_file,
                                                               info.pc_insn_label->location(),
-                                                              info.element_offset));
+                                                              info.offset_or_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    vixl::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::StringPatch(literal->offset(),
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.location(),
+                                                               &info.target_dex_file,
+                                                               info.pc_insn_label->location(),
+                                                               info.offset_or_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    vixl::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal->offset()));
   }
 }
 
+vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
+                                                                      Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
+}
+
 vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
-  // Look up the literal for value.
-  auto lb = uint64_literals_.lower_bound(value);
-  if (lb != uint64_literals_.end() && !uint64_literals_.key_comp()(value, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this value, insert a new one.
-  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(value);
-  uint64_literals_.PutBefore(lb, value, literal);
-  return literal;
+  return uint64_literals_.GetOrCreate(
+      value,
+      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
 }
 
 vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
     MethodReference target_method,
     MethodToLiteralMap* map) {
-  // Look up the literal for target_method.
-  auto lb = map->lower_bound(target_method);
-  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
-    return lb->second;
-  }
-  // We don't have a literal for this method yet, insert a new one.
-  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(0u);
-  map->PutBefore(lb, target_method, literal);
-  return literal;
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
 }
 
 vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
@@ -3947,36 +3987,135 @@
   __ Str(wzr, GetExceptionTlsAddress());
 }
 
-void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
+HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
   Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
-  Register current_method = InputRegisterAt(load, 0);
 
-  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-  GenerateGcRootFieldLoad(
-      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-  // /* GcRoot<mirror::String> */ out = out[string_index]
-  GenerateGcRootFieldLoad(
-      load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                              load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      // Add ADRP with its PC-relative String patch.
+      const DexFile& dex_file = load->GetDexFile();
+      uint32_t string_index = load->GetStringIndex();
+      vixl::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add ADD with its PC-relative String patch.
+      vixl::Label* add_label =
+          codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(add_label);
+        __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress()));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
+      // that gives a 16KiB range. To try and reduce the number of literals if we load
+      // multiple strings, simply split the dex cache address to a 16KiB aligned base
+      // loaded from a literal and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(load->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
+      uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
+      uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
+      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      // Add ADRP with its PC-relative DexCache access patch.
+      const DexFile& dex_file = load->GetDexFile();
+      uint32_t element_offset = load->GetDexCacheElementOffset();
+      vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add LDR with its PC-relative DexCache access patch.
+      vixl::Label* ldr_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      Register current_method = InputRegisterAt(load, 0);
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(
+          load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(
+          load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
@@ -3986,14 +4125,6 @@
   }
 }
 
-void LocationsBuilderARM64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4386,34 +4517,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-      UNREACHABLE();
-  }
-}
-
-void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -4681,8 +4784,7 @@
       __ B(codegen_->GetLabelOf(default_block));
     }
   } else {
-    JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr);
-    codegen_->AddJumpTable(jump_table);
+    JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
 
     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
 
@@ -4791,7 +4893,8 @@
 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                             Location root,
                                                             vixl::Register obj,
-                                                            uint32_t offset) {
+                                                            uint32_t offset,
+                                                            vixl::Label* fixup_label) {
   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
@@ -4804,7 +4907,13 @@
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      __ Ldr(root_reg, MemOperand(obj, offset));
+      if (fixup_label == nullptr) {
+        __ Ldr(root_reg, MemOperand(obj, offset));
+      } else {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(fixup_label);
+        __ ldr(root_reg, MemOperand(obj, offset));
+      }
       static_assert(
           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
@@ -4829,14 +4938,26 @@
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
       // /* GcRoot<mirror::Object>* */ root = obj + offset
-      __ Add(root_reg.X(), obj.X(), offset);
+      if (fixup_label == nullptr) {
+        __ Add(root_reg.X(), obj.X(), offset);
+      } else {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(fixup_label);
+        __ add(root_reg.X(), obj.X(), offset);
+      }
       // /* mirror::Object* */ root = root->Read()
       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-    __ Ldr(root_reg, MemOperand(obj, offset));
+    if (fixup_label == nullptr) {
+      __ Ldr(root_reg, MemOperand(obj, offset));
+    } else {
+      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      __ Bind(fixup_label);
+      __ ldr(root_reg, MemOperand(obj, offset));
+    }
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index cf9dc1b..422963e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
+#include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
 #include "dex/compiler_enums.h"
@@ -24,9 +25,9 @@
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm64/assembler_arm64.h"
+#include "utils/string_reference.h"
 #include "vixl/a64/disasm-a64.h"
 #include "vixl/a64/macro-assembler-a64.h"
-#include "arch/arm64/quick_method_frame_info_arm64.h"
 
 namespace art {
 namespace arm64 {
@@ -82,7 +83,7 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
 
-class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> {
+class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
  public:
   explicit JumpTableARM64(HPackedSwitch* switch_instr)
     : switch_instr_(switch_instr), table_start_() {}
@@ -255,7 +256,8 @@
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                vixl::Register obj,
-                               uint32_t offset);
+                               uint32_t offset,
+                               vixl::Label* fixup_label = nullptr);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -350,8 +352,9 @@
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  vixl::Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<vixl::Label>(block_labels_, block);
+  vixl::Label* GetLabelOf(HBasicBlock* block) {
+    block = FirstNonEmptyBlock(block);
+    return &(block_labels_[block->GetBlockId()]);
   }
 
   size_t GetWordSize() const OVERRIDE {
@@ -363,7 +366,7 @@
     return kArm64WordSize;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     vixl::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
     return block_entry_label->location();
@@ -384,8 +387,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
@@ -413,11 +414,12 @@
   }
 
   void Initialize() OVERRIDE {
-    block_labels_ = CommonInitializeLabels<vixl::Label>();
+    block_labels_.resize(GetGraph()->GetBlocks().size());
   }
 
-  void AddJumpTable(JumpTableARM64* jump_table) {
-    jump_tables_.push_back(jump_table);
+  JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
+    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
+    return jump_tables_.back().get();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -453,6 +455,11 @@
     return false;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -467,6 +474,27 @@
     UNIMPLEMENTED(FATAL);
   }
 
+  // Add a new PC-relative string patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                        uint32_t string_index,
+                                        vixl::Label* adrp_label = nullptr);
+
+  // Add a new PC-relative dex cache array patch for an instruction and return
+  // the label to be bound before the instruction. The instruction will be
+  // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
+  // pointing to the associated ADRP patch label).
+  vixl::Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                               uint32_t element_offset,
+                                               vixl::Label* adrp_label = nullptr);
+
+  vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                             uint32_t string_index);
+  vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
   // Fast path implementation of ReadBarrier::Barrier for a heap
@@ -554,32 +582,46 @@
                                                  bool use_load_acquire);
 
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
                                           vixl::Literal<uint64_t>*,
                                           MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              vixl::Literal<uint32_t>*,
+                                              StringReferenceValueComparator>;
 
+  vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
   vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
                                                     MethodToLiteralMap* map);
   vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
   vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
-  struct PcRelativeDexCacheAccessInfo {
-    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
-        : target_dex_file(dex_file), element_offset(element_off), label(), pc_insn_label() { }
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { }
 
     const DexFile& target_dex_file;
-    uint32_t element_offset;
+    // Either the dex cache array element offset or the string index.
+    uint32_t offset_or_index;
     vixl::Label label;
     vixl::Label* pc_insn_label;
   };
 
+  vixl::Label* NewPcRelativePatch(const DexFile& dex_file,
+                                  uint32_t offset_or_index,
+                                  vixl::Label* adrp_label,
+                                  ArenaDeque<PcRelativePatchInfo>* patches);
+
   void EmitJumpTables();
 
   // Labels for each block that will be compiled.
-  vixl::Label* block_labels_;  // Indexed by block id.
+  // We use a deque so that the `vixl::Label` objects do not move in memory.
+  ArenaDeque<vixl::Label> block_labels_;  // Indexed by block id.
   vixl::Label frame_entry_label_;
-  ArenaVector<JumpTableARM64*> jump_tables_;
+  ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
   InstructionCodeGeneratorARM64 instruction_visitor_;
@@ -587,7 +629,10 @@
   Arm64Assembler assembler_;
   const Arm64InstructionSetFeatures& isa_features_;
 
-  // Deduplication map for 64-bit literals, used for non-patchable method address and method code.
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for non-patchable method address, method code
+  // or string dex cache address.
   Uint64ToLiteralMap uint64_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
@@ -596,7 +641,13 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
   // PC-relative DexCache access info.
-  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index a29d839..fb50680 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -471,7 +471,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(&isa_features),
+      assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
@@ -974,31 +974,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -1828,7 +1803,7 @@
 
 void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -3816,6 +3791,12 @@
   return false;
 }
 
+HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
+  // TODO: Implement other kinds.
+  return HLoadString::LoadKind::kDexCacheViaMethod;
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
@@ -3890,8 +3871,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord,
                         reg,
                         reg,
@@ -4057,18 +4039,10 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->IsInDexCache()
-      ? LocationSummary::kNoCall
-      : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
@@ -4090,14 +4064,6 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4605,33 +4571,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index b720573..435a869 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -275,7 +275,7 @@
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMipsDoublewordSize; }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
@@ -290,8 +290,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
@@ -345,6 +343,11 @@
     return type == Primitive::kPrimLong;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 72ef499..e67d8d0 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -417,6 +417,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
@@ -869,31 +870,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
                                      GpuRegister value,
                                      bool value_can_be_null) {
@@ -1450,7 +1426,7 @@
 
 void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
@@ -3030,6 +3006,12 @@
   return false;
 }
 
+HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
+  // TODO: Implement other kinds.
+  return HLoadString::LoadKind::kDexCacheViaMethod;
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
@@ -3104,8 +3086,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadDoubleword,
                         reg,
                         reg,
@@ -3275,18 +3258,10 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->IsInDexCache()
-      ? LocationSummary::kNoCall
-      : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
@@ -3311,14 +3286,6 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -3739,33 +3706,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9464a14..9785a2e 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -271,7 +271,7 @@
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
@@ -286,8 +286,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
@@ -337,6 +335,11 @@
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 394f4ee..50892a9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -795,11 +795,16 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      constant_area_start_(-1),
+      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_address_offset_(-1) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -890,30 +895,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1644,49 +1625,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -3365,17 +3303,6 @@
   int shift;
   CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
 
-  NearLabel ndiv;
-  NearLabel end;
-  // If numerator is 0, the result is 0, no computation needed.
-  __ testl(eax, eax);
-  __ j(kNotEqual, &ndiv);
-
-  __ xorl(out, out);
-  __ jmp(&end);
-
-  __ Bind(&ndiv);
-
   // Save the numerator.
   __ movl(num, eax);
 
@@ -3410,7 +3337,6 @@
   } else {
     __ movl(eax, edx);
   }
-  __ Bind(&end);
 }
 
 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
@@ -4328,8 +4254,10 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barrier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
@@ -4340,6 +4268,8 @@
 
   // We disable pc-relative load when there is an irreducible loop, as the optimization
   // is incompatible with it.
+  // TODO: Create as many X86ComputeBaseMethodAddress instructions
+  // as needed for methods with irreducible loops.
   if (GetGraph()->HasIrreducibleLoops() &&
       (dispatch_info.method_load_kind ==
           HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
@@ -4401,18 +4331,17 @@
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      __ movl(temp.AsRegister<Register>(), Immediate(0));  // Placeholder.
+      __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0));
       method_patches_.emplace_back(invoke->GetTargetMethod());
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
                                                                 temp.AsRegister<Register>());
-      uint32_t offset = invoke->GetDexCacheArrayOffset();
       __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
-      // Add the patch entry and bind its label at the end of the instruction.
-      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, offset);
-      __ Bind(&pc_relative_dex_cache_patches_.back().label);
+      // Bind a new fixup label at the end of the "movl" insn.
+      uint32_t offset = invoke->GetDexCacheArrayOffset();
+      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
@@ -4430,8 +4359,9 @@
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movl(reg, Address(method_reg,
                            ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -4494,12 +4424,33 @@
       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
 }
 
+void CodeGeneratorX86::RecordSimplePatch() {
+  if (GetCompilerOptions().GetIncludePatchInformation()) {
+    simple_patches_.emplace_back();
+    __ Bind(&simple_patches_.back());
+  }
+}
+
+void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) {
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  __ Bind(&string_patches_.back().label);
+}
+
+Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                         uint32_t element_offset) {
+  // Add the patch entry and bind its label at the end of the instruction.
+  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
+  return &pc_relative_dex_cache_patches_.back().label;
+}
+
 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
       method_patches_.size() +
       relative_call_patches_.size() +
-      pc_relative_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      simple_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -4523,6 +4474,26 @@
                                                               GetMethodAddressOffset(),
                                                               info.element_offset));
   }
+  for (const Label& label : simple_patches_) {
+    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+  if (GetCompilerOptions().GetCompilePic()) {
+    for (const StringPatchInfo<Label>& info : string_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
+                                                                 &info.dex_file,
+                                                                 GetMethodAddressOffset(),
+                                                                 info.string_index));
+    }
+  } else {
+    for (const StringPatchInfo<Label>& info : string_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                         &info.dex_file,
+                                                         info.string_index));
+    }
+  }
 }
 
 void CodeGeneratorX86::MarkGCCard(Register temp,
@@ -5509,7 +5480,7 @@
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ movl(out, Address(obj, offset));
@@ -5916,14 +5887,15 @@
     DCHECK(!cls->MustGenerateClinitCheck());
     // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
     GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movl(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
     // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    GenerateGcRootFieldLoad(
+        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5972,12 +5944,58 @@
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      FALLTHROUGH_INTENDED;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+      load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -5985,16 +6003,61 @@
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-  GenerateGcRootFieldLoad(
-      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-  __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-  // /* GcRoot<mirror::String> */ out = out[string_index]
-  GenerateGcRootFieldLoad(
-      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ movl(out, Immediate(/* placeholder */ 0));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ movl(out, Immediate(address));
+      codegen_->RecordSimplePatch();
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      uint32_t offset = load->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
+      GenerateGcRootFieldLoad(
+          load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(
+          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(
+          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
@@ -6692,21 +6755,24 @@
 
 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                           Location root,
-                                                          Register obj,
-                                                          uint32_t offset) {
+                                                          const Address& address,
+                                                          Label* fixup_label) {
   Register root_reg = root.AsRegister<Register>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
-      //   root = obj.field;
+      //   root = *address;
       //   if (Thread::Current()->GetIsGcMarking()) {
       //     root = ReadBarrier::Mark(root)
       //   }
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      __ movl(root_reg, Address(obj, offset));
+      // /* GcRoot<mirror::Object> */ root = *address
+      __ movl(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
       static_assert(
           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
@@ -6727,15 +6793,21 @@
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
-      // /* GcRoot<mirror::Object>* */ root = obj + offset
-      __ leal(root_reg, Address(obj, offset));
+      // /* GcRoot<mirror::Object>* */ root = address
+      __ leal(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
       // /* mirror::Object* */ root = root->Read()
       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
-    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-    __ movl(root_reg, Address(obj, offset));
+    // /* GcRoot<mirror::Object> */ root = *address
+    __ movl(root_reg, address);
+    if (fixup_label != nullptr) {
+      __ Bind(fixup_label);
+    }
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c397899..fe7d3ed 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -258,13 +258,13 @@
                                          Location maybe_temp);
   // Generate a GC root reference load:
   //
-  //   root <- *(obj + offset)
+  //   root <- *address
   //
   // while honoring read barriers (if any).
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
-                               Register obj,
-                               uint32_t offset);
+                               const Address& address,
+                               Label* fixup_label = nullptr);
 
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
@@ -361,14 +361,12 @@
     return assembler_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
@@ -388,6 +386,11 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -399,6 +402,10 @@
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
+  void RecordSimplePatch();
+  void RecordStringPatch(HLoadString* load_string);
+  Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // Emit linker patches.
@@ -531,7 +538,7 @@
   // touch (but not change) the top of the stack.
   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
   void MemoryFence(bool non_temporal = false) {
-    if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+    if (!non_temporal) {
       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
     } else {
       assembler_.mfence();
@@ -542,6 +549,10 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
+  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
+  // The correct value will be inserted when processing Assembler fixups.
+  static constexpr int32_t kDummy32BitOffset = 256;
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
@@ -578,6 +589,10 @@
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+  // Patch locations for patchoat where the linker doesn't do any other work.
+  ArenaDeque<Label> simple_patches_;
+  // String patch locations.
+  ArenaDeque<StringPatchInfo<Label>> string_patches_;
 
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
@@ -592,10 +607,6 @@
   // instruction gives the address of the start of this method.
   int32_t method_address_offset_;
 
-  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
-  // The correct value will be inserted when processing Assembler fixups.
-  static constexpr int32_t kDummy32BitOffset = 256;
-
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
 };
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d24b5bb..56c5b06 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -784,14 +784,14 @@
       method_patches_.emplace_back(invoke->GetTargetMethod());
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                                  invoke->GetDexCacheArrayOffset());
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       __ movq(temp.AsRegister<CpuRegister>(),
               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
-      // Bind the label at the end of the "movl" insn.
-      __ Bind(&pc_relative_dex_cache_patches_.back().label);
+      // Bind a new fixup label at the end of the "movl" insn.
+      uint32_t offset = invoke->GetDexCacheArrayOffset();
+      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
       break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
@@ -808,8 +808,9 @@
       __ movq(reg,
               Address(CpuRegister(method_reg),
                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -873,12 +874,33 @@
       kX86_64WordSize).SizeValue()));
 }
 
+void CodeGeneratorX86_64::RecordSimplePatch() {
+  if (GetCompilerOptions().GetIncludePatchInformation()) {
+    simple_patches_.emplace_back();
+    __ Bind(&simple_patches_.back());
+  }
+}
+
+void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  __ Bind(&string_patches_.back().label);
+}
+
+Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                            uint32_t element_offset) {
+  // Add a patch entry and return the label.
+  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
+  return &pc_relative_dex_cache_patches_.back().label;
+}
+
 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
       method_patches_.size() +
       relative_call_patches_.size() +
-      pc_relative_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      simple_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -902,6 +924,18 @@
                                                               info.label.Position(),
                                                               info.element_offset));
   }
+  for (const Label& label : simple_patches_) {
+    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+  for (const StringPatchInfo<Label>& info : string_patches_) {
+    // These are always PC-relative, see GetSupportedLoadStringKind().
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
+                                                               &info.dex_file,
+                                                               info.label.Position(),
+                                                               info.string_index));
+  }
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -973,11 +1007,14 @@
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
+        assembler_(graph->GetArena()),
         isa_features_(isa_features),
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -1083,30 +1120,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   if (source.Equals(destination)) {
     return;
@@ -1625,49 +1638,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -2799,7 +2769,7 @@
           } else if (in.IsConstant()) {
             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
-            codegen_->Load64BitValue(dest, static_cast<double>(v));
+            codegen_->Load32BitValue(dest, static_cast<float>(v));
           } else {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
@@ -3420,16 +3390,6 @@
 
     __ movl(numerator, eax);
 
-    NearLabel no_div;
-    NearLabel end;
-    __ testl(eax, eax);
-    __ j(kNotEqual, &no_div);
-
-    __ xorl(out, out);
-    __ jmp(&end);
-
-    __ Bind(&no_div);
-
     __ movl(eax, Immediate(magic));
     __ imull(numerator);
 
@@ -3455,7 +3415,6 @@
     } else {
       __ movl(eax, edx);
     }
-    __ Bind(&end);
   } else {
     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
 
@@ -4089,8 +4048,10 @@
       // nop
       break;
     }
-    default:
-      LOG(FATAL) << "Unexpected memory barier " << kind;
+    case MemBarrierKind::kNTStoreStore:
+      // Non-Temporal Store/Store needs an explicit fence.
+      MemoryFence(/* non-temporal */ true);
+      break;
   }
 }
 
@@ -4995,7 +4956,7 @@
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   __ movl(out, Address(obj, offset));
@@ -5249,6 +5210,12 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+  __ movq(CpuRegister(TMP), reg1);
+  __ movq(reg1, reg2);
+  __ movq(reg2, CpuRegister(TMP));
+}
+
 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movq(Address(CpuRegister(RSP), mem), reg);
@@ -5286,7 +5253,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+    Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -5365,14 +5332,15 @@
     DCHECK(!cls->MustGenerateClinitCheck());
     // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
     GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movq(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
     // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    GenerateGcRootFieldLoad(
+        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5410,12 +5378,49 @@
                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
 }
 
+HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_string_load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadString::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadString::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      // We prefer the always-available RIP-relative address for the x86-64 boot image.
+      return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -5423,16 +5428,59 @@
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-  GenerateGcRootFieldLoad(
-      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
-  __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-  // /* GcRoot<mirror::String> */ out = out[string_index]
-  GenerateGcRootFieldLoad(
-      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  switch (load->GetLoadKind()) {
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+      codegen_->RecordStringPatch(load);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ movl(out, Immediate(address));  // Zero-extended.
+      codegen_->RecordSimplePatch();
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      if (IsUint<32>(load->GetAddress())) {
+        Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
+        GenerateGcRootFieldLoad(load, out_loc, address);
+      } else {
+        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
+        __ movq(out, Immediate(load->GetAddress()));
+        GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
+      }
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      uint32_t offset = load->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ false);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(
+          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(
+          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
@@ -6171,21 +6219,24 @@
 
 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                              Location root,
-                                                             CpuRegister obj,
-                                                             uint32_t offset) {
+                                                             const Address& address,
+                                                             Label* fixup_label) {
   CpuRegister root_reg = root.AsRegister<CpuRegister>();
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
-      //   root = obj.field;
+      //   root = *address;
       //   if (Thread::Current()->GetIsGcMarking()) {
       //     root = ReadBarrier::Mark(root)
       //   }
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      __ movl(root_reg, Address(obj, offset));
+      // /* GcRoot<mirror::Object> */ root = *address
+      __ movl(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
       static_assert(
           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
@@ -6207,15 +6258,21 @@
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
-      // /* GcRoot<mirror::Object>* */ root = obj + offset
-      __ leaq(root_reg, Address(obj, offset));
+      // /* GcRoot<mirror::Object>* */ root = address
+      __ leaq(root_reg, address);
+      if (fixup_label != nullptr) {
+        __ Bind(fixup_label);
+      }
       // /* mirror::Object* */ root = root->Read()
       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
-    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-    __ movl(root_reg, Address(obj, offset));
+    // /* GcRoot<mirror::Object> */ root = *address
+    __ movl(root_reg, address);
+    if (fixup_label != nullptr) {
+      __ Bind(fixup_label);
+    }
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c3fce6e..d9908bb 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -142,6 +142,7 @@
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
@@ -252,13 +253,13 @@
                                          Location maybe_temp);
   // Generate a GC root reference load:
   //
-  //   root <- *(obj + offset)
+  //   root <- *address
   //
   // while honoring read barriers (if any).
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
-                               CpuRegister obj,
-                               uint32_t offset);
+                               const Address& address,
+                               Label* fixup_label = nullptr);
 
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
@@ -345,12 +346,10 @@
     return &move_resolver_;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return GetLabelOf(block)->Position();
   }
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -384,6 +383,11 @@
     return false;
   }
 
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -393,6 +397,10 @@
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
+  void RecordSimplePatch();
+  void RecordStringPatch(HLoadString* load_string);
+  Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
@@ -501,10 +509,10 @@
 
   // Ensure that prior stores complete to memory before subsequent loads.
   // The locked add implementation will avoid serializing device memory, but will
-  // touch (but not change) the top of the stack. The locked add should not be used for
-  // ordering non-temporal stores.
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
   void MemoryFence(bool force_mfence = false) {
-    if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+    if (!force_mfence) {
       assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
     } else {
       assembler_.mfence();
@@ -515,6 +523,10 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
+  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
+  // We will fix this up in the linker later to have the right value.
+  static constexpr int32_t kDummy32BitOffset = 256;
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
@@ -552,10 +564,10 @@
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
-
-  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
-  // We will fix this up in the linker later to have the right value.
-  static constexpr int32_t kDummy32BitOffset = 256;
+  // Patch locations for patchoat where the linker doesn't do any other work.
+  ArenaDeque<Label> simple_patches_;
+  // String patch locations.
+  ArenaDeque<StringPatchInfo<Label>> string_patches_;
 
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 6c55194..a849448 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -53,17 +53,17 @@
 }
 
 static inline vixl::Register XRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
+  DCHECK(location.IsRegister()) << location;
   return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
 static inline vixl::Register WRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
+  DCHECK(location.IsRegister()) << location;
   return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
 static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type));
+  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
@@ -77,17 +77,17 @@
 }
 
 static inline vixl::FPRegister DRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
+  DCHECK(location.IsFpuRegister()) << location;
   return vixl::FPRegister::DRegFromCode(location.reg());
 }
 
 static inline vixl::FPRegister SRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
+  DCHECK(location.IsFpuRegister()) << location;
   return vixl::FPRegister::SRegFromCode(location.reg());
 }
 
 static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(Primitive::IsFloatingPointType(type));
+  DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
@@ -124,7 +124,7 @@
   } else if (instr->IsNullConstant()) {
     return 0;
   } else {
-    DCHECK(instr->IsLongConstant());
+    DCHECK(instr->IsLongConstant()) << instr->DebugName();
     return instr->AsLongConstant()->GetValue();
   }
 }
@@ -199,7 +199,7 @@
 
   // For single uses we let VIXL handle the constant generation since it will
   // use registers that are not managed by the register allocator (wip0, wip1).
-  if (constant->GetUses().HasOnlyOneUse()) {
+  if (constant->GetUses().HasExactlyOneElement()) {
     return true;
   }
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 1e54a0a..d1a2a26 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -111,22 +111,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  2: IntConstant [5]\n"
-      "  10: SuspendCheck\n"
-      "  11: Goto 1\n"
+      "  2: IntConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  5: Neg(2) [8]\n"
-      "  8: Return(5)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  9: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  2: IntConstant [5]\n", "  2: IntConstant\n" },
-    { "  10: SuspendCheck\n",   "  10: SuspendCheck\n"
-                                "  12: IntConstant [8]\n" },
-    { "  5: Neg(2) [8]\n",      removed },
-    { "  8: Return(5)\n",       "  8: Return(12)\n" }
+    { "  2: IntConstant [3]\n", "  2: IntConstant\n"
+                                "  6: IntConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",      removed },
+    { "  4: Return(3)\n",       "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -173,22 +172,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  4: LongConstant [7]\n"
-      "  12: SuspendCheck\n"
-      "  13: Goto 1\n"
+      "  2: LongConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  7: Neg(4) [10]\n"
-      "  10: Return(7)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  11: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  4: LongConstant [7]\n", "  4: LongConstant\n" },
-    { "  12: SuspendCheck\n",    "  12: SuspendCheck\n"
-                                 "  14: LongConstant [10]\n" },
-    { "  7: Neg(4) [10]\n",      removed },
-    { "  10: Return(7)\n",       "  10: Return(14)\n" }
+    { "  2: LongConstant [3]\n", "  2: LongConstant\n"
+                                 "  6: LongConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",       removed },
+    { "  4: Return(3)\n",        "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -201,7 +199,7 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  4: LongConstant\n", removed },
+    { "  2: LongConstant\n", removed },
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -232,25 +230,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Add(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n", "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n", "  3: IntConstant\n"
+                                "  7: IntConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",   removed },
+    { "  5: Return(4)\n",       "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -263,8 +260,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -302,35 +299,34 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  11: IntConstant [17]\n"
-    "  13: IntConstant [17]\n"
-    "  26: SuspendCheck\n"
-    "  27: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [21]\n"
-    "  17: Add(11, 13) [21]\n"
-    "  21: Add(9, 17) [24]\n"
-    "  24: Return(21)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  25: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  5: IntConstant [7]\n"
+      "  6: IntConstant [7]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [8]\n"
+      "  7: Add(5, 6) [8]\n"
+      "  8: Add(4, 7) [9]\n"
+      "  9: Return(8)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  10: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
-    { "  11: IntConstant [17]\n", "  11: IntConstant\n" },
-    { "  13: IntConstant [17]\n", "  13: IntConstant\n" },
-    { "  26: SuspendCheck\n",     "  26: SuspendCheck\n"
-                                  "  28: IntConstant\n"
-                                  "  29: IntConstant\n"
-                                  "  30: IntConstant [24]\n" },
-    { "  9: Add(3, 5) [21]\n",    removed },
-    { "  17: Add(11, 13) [21]\n", removed },
-    { "  21: Add(9, 17) [24]\n",  removed  },
-    { "  24: Return(21)\n",       "  24: Return(30)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n" },
+    { "  5: IntConstant [7]\n",  "  5: IntConstant\n" },
+    { "  6: IntConstant [7]\n",  "  6: IntConstant\n"
+                                 "  11: IntConstant\n"
+                                 "  12: IntConstant\n"
+                                 "  13: IntConstant [9]\n" },
+    { "  4: Add(2, 3) [8]\n",    removed },
+    { "  7: Add(5, 6) [8]\n",    removed },
+    { "  8: Add(4, 7) [9]\n",    removed  },
+    { "  9: Return(8)\n",        "  9: Return(13)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -349,12 +345,12 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
+    { "  2: IntConstant\n",  removed },
     { "  3: IntConstant\n",  removed },
     { "  5: IntConstant\n",  removed },
+    { "  6: IntConstant\n",  removed },
     { "  11: IntConstant\n", removed },
-    { "  13: IntConstant\n", removed },
-    { "  28: IntConstant\n", removed },
-    { "  29: IntConstant\n", removed }
+    { "  12: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -384,25 +380,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Sub(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Sub(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n"
+                                 "  7: IntConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",    removed },
+    { "  5: Return(4)\n",        "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -415,8 +410,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -448,25 +443,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Add(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Add(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -479,8 +473,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -513,25 +507,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Sub(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Sub(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -544,8 +537,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -593,46 +586,45 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"            // v0 <- 1
-    "  5: IntConstant [9]\n"            // v1 <- 2
-    "  13: IntConstant [14]\n"          // const 5
-    "  18: IntConstant [19]\n"          // const 4
-    "  23: IntConstant [24]\n"          // const 8
-    "  29: SuspendCheck\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"             // v2 <- v0 + v1 = 1 + 2 = 3
-    "  11: Goto 3\n"                    // goto L2
-    "BasicBlock 2, pred: 3, succ: 4\n"  // L1:
-    "  14: Add(19, 13) [24]\n"          // v1 <- v0 + 3 = 7 + 5 = 12
-    "  16: Goto 4\n"                    // goto L3
-    "BasicBlock 3, pred: 1, succ: 2\n"  // L2:
-    "  19: Add(9, 18) [14]\n"           // v0 <- v2 + 2 = 3 + 4 = 7
-    "  21: Goto 2\n"                    // goto L1
-    "BasicBlock 4, pred: 2, succ: 5\n"  // L3:
-    "  24: Add(14, 23) [27]\n"          // v2 <- v1 + 4 = 12 + 8 = 20
-    "  27: Return(24)\n"                // return v2
-    "BasicBlock 5, pred: 4\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"             // v0 <- 1
+      "  3: IntConstant [4]\n"             // v1 <- 2
+      "  6: IntConstant [7]\n"             // const 5
+      "  9: IntConstant [10]\n"            // const 4
+      "  12: IntConstant [13]\n"           // const 8
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"               // v2 <- v0 + v1 = 1 + 2 = 3
+      "  5: Goto 3\n"                      // goto L2
+      "BasicBlock 2, pred: 3, succ: 4\n"   // L1:
+      "  10: Add(7, 9) [13]\n"             // v1 <- v0 + 3 = 7 + 5 = 12
+      "  11: Goto 4\n"                     // goto L3
+      "BasicBlock 3, pred: 1, succ: 2\n"   // L2:
+      "  7: Add(4, 6) [10]\n"              // v0 <- v2 + 2 = 3 + 4 = 7
+      "  8: Goto 2\n"                      // goto L1
+      "BasicBlock 4, pred: 2, succ: 5\n"   // L3:
+      "  13: Add(10, 12) [14]\n"           // v2 <- v1 + 4 = 12 + 8 = 20
+      "  14: Return(13)\n"                 // return v2
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
-    { "  13: IntConstant [14]\n", "  13: IntConstant\n" },
-    { "  18: IntConstant [19]\n", "  18: IntConstant\n" },
-    { "  23: IntConstant [24]\n", "  23: IntConstant\n" },
-    { "  29: SuspendCheck\n",     "  29: SuspendCheck\n"
-                                  "  31: IntConstant\n"
-                                  "  32: IntConstant\n"
-                                  "  33: IntConstant\n"
-                                  "  34: IntConstant [27]\n" },
-    { "  9: Add(3, 5) [19]\n",    removed },
-    { "  14: Add(19, 13) [24]\n", removed },
-    { "  19: Add(9, 18) [14]\n",  removed },
-    { "  24: Add(14, 23) [27]\n", removed },
-    { "  27: Return(24)\n",       "  27: Return(34)\n"}
+    { "  2: IntConstant [4]\n",   "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",   "  3: IntConstant\n" },
+    { "  6: IntConstant [7]\n",   "  6: IntConstant\n" },
+    { "  9: IntConstant [10]\n",  "  9: IntConstant\n" },
+    { "  12: IntConstant [13]\n", "  12: IntConstant\n"
+                                  "  16: IntConstant\n"
+                                  "  17: IntConstant\n"
+                                  "  18: IntConstant\n"
+                                  "  19: IntConstant [14]\n" },
+    { "  4: Add(2, 3) [7]\n",     removed },
+    { "  10: Add(7, 9) [13]\n",   removed },
+    { "  7: Add(4, 6) [10]\n",    removed },
+    { "  13: Add(10, 12) [14]\n", removed },
+    { "  14: Return(13)\n",       "  14: Return(19)\n"}
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -654,14 +646,14 @@
 
   // Expected difference after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  29: SuspendCheck\n"
-    "  34: IntConstant [27]\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  27: Return(34)\n"
-    "BasicBlock 5, pred: 1\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  19: IntConstant [14]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: Return(19)\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -693,31 +685,31 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [15, 22, 8]\n",      "  3: IntConstant [9, 15, 22]\n" },
-    { "  5: IntConstant [22, 8]\n",          "  5: IntConstant [22]\n" },
-    { "  8: GreaterThanOrEqual(3, 5) [9]\n", removed },
-    { "  9: If(8)\n",                        "  9: If(3)\n" }
+    { "  3: IntConstant [9, 8, 5]\n",        "  3: IntConstant [6, 9, 8]\n" },
+    { "  4: IntConstant [8, 5]\n",           "  4: IntConstant [8]\n" },
+    { "  5: GreaterThanOrEqual(3, 4) [6]\n", removed },
+    { "  6: If(5)\n",                        "  6: If(3)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -730,13 +722,13 @@
 
   // Expected graph after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 4\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 1\n"
-    "  18: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 4\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 1\n"
+      "  11: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -766,7 +758,10 @@
   HInstruction* parameter = new (&allocator_) HParameterValue(
       graph_->GetDexFile(), 0, 0, Primitive::kPrimInt, true);
   entry_block->AddInstruction(parameter);
+  entry_block->AddInstruction(new (&allocator_) HGoto());
+
   HInstruction* zero = graph_->GetIntConstant(0);
+
   HInstruction* last;
   block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter));
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
@@ -784,70 +779,70 @@
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero));
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
-
-  entry_block->AddInstruction(new (&allocator_) HGoto());
   block->AddInstruction(new (&allocator_) HReturn(zero));
+
   exit_block->AddInstruction(new (&allocator_) HExit());
 
   graph_->BuildDominatorTree();
 
   const std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [17, 17, 16, 15, 15, 14, 13, 13, 12, 11, 11, 10, 9, 9, "
-                           "8, 7, 7, 6, 5, 5, 4, 3, 3, 2]\n"
-      "  1: IntConstant [19, 16, 14, 12, 10, 8, 6, 4, 2]\n"
-      "  18: Goto 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 9, "
+                            "8, 8, 7, 6, 6, 5, 4, 4, 3]\n"
+      "  2: IntConstant [19, 17, 15, 13, 11, 9, 7, 5, 3]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  2: Above(1, 0) [3]\n"
-      "  3: Select(0, 0, 2)\n"
-      "  4: Above(0, 1) [5]\n"
-      "  5: Select(0, 0, 4)\n"
-      "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Select(0, 0, 6)\n"
-      "  8: AboveOrEqual(0, 1) [9]\n"
-      "  9: Select(0, 0, 8)\n"
-      "  10: Below(1, 0) [11]\n"
-      "  11: Select(0, 0, 10)\n"
-      "  12: Below(0, 1) [13]\n"
-      "  13: Select(0, 0, 12)\n"
-      "  14: BelowOrEqual(1, 0) [15]\n"
-      "  15: Select(0, 0, 14)\n"
-      "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Select(0, 0, 16)\n"
-      "  19: Return(1)\n"
+      "  3: Above(2, 0) [4]\n"
+      "  4: Select(0, 0, 3)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  9: AboveOrEqual(0, 2) [10]\n"
+      "  10: Select(0, 0, 9)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  13: Below(0, 2) [14]\n"
+      "  14: Select(0, 0, 13)\n"
+      "  15: BelowOrEqual(2, 0) [16]\n"
+      "  16: Select(0, 0, 15)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
   const std::string expected_after_cf =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [17, 17, 16, 15, 15, 13, 13, 11, 11, 10, 9, 9, 7, 7, 6, 5, 5, 4, 3, 3]\n"
-      "  1: IntConstant [13, 3, 19, 16, 10, 6, 4]\n"
-      "  21: IntConstant [15, 9]\n"
-      "  18: Goto 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 14, 14, 12, 12, 11, 10, 10, "
+                            "8, 8, 7, 6, 6, 5, 4, 4]\n"
+      "  2: IntConstant [14, 4, 19, 17, 11, 7, 5]\n"
+      "  21: IntConstant [16, 10]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  3: Select(0, 0, 1)\n"
-      "  4: Above(0, 1) [5]\n"
-      "  5: Select(0, 0, 4)\n"
-      "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Select(0, 0, 6)\n"
-      "  9: Select(0, 0, 21)\n"
-      "  10: Below(1, 0) [11]\n"
-      "  11: Select(0, 0, 10)\n"
-      "  13: Select(0, 0, 1)\n"
-      "  15: Select(0, 0, 21)\n"
-      "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Select(0, 0, 16)\n"
-      "  19: Return(1)\n"
+      "  4: Select(0, 0, 2)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  10: Select(0, 0, 21)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  14: Select(0, 0, 2)\n"
+      "  16: Select(0, 0, 21)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
   const std::string expected_after_dce =
       "BasicBlock 0, succ: 1\n"
       "  0: ParameterValue\n"
-      "  1: IntConstant [19]\n"
-      "  18: Goto 1\n"
+      "  2: IntConstant [19]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  19: Return(1)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 5f11024..49cfff4 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) {
-  ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter());
+  ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter(kArenaAllocDCE));
   constexpr size_t kDefaultWorlistSize = 8;
   worklist.reserve(kDefaultWorlistSize);
   visited->SetBit(graph->GetEntryBlock()->GetBlockId());
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 83e724b..fe52aac 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -78,30 +78,30 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
-    { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
-    { "  22: Phi(5, 3) [15]\n",         "  22: Phi(5, 3)\n" },
-    { "  15: Add(22, 3)\n",             removed }
+    { "  3: IntConstant [9, 8, 5]\n",  "  3: IntConstant [8, 5]\n" },
+    { "  8: Phi(4, 3) [9]\n",          "  8: Phi(4, 3)\n" },
+    { "  9: Add(8, 3)\n",              removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
 
@@ -144,37 +144,37 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  13: IntConstant [14]\n"
-    "  18: IntConstant [19]\n"
-    "  23: IntConstant [24]\n"
-    "  28: SuspendCheck\n"
-    "  29: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"
-    "  11: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  14: Add(19, 13) [24]\n"
-    "  16: Goto 4\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  19: Add(9, 18) [14]\n"
-    "  21: Goto 2\n"
-    "BasicBlock 4, pred: 2, succ: 5\n"
-    "  24: Add(14, 23)\n"
-    "  26: ReturnVoid\n"
-    "BasicBlock 5, pred: 4\n"
-    "  27: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  6: IntConstant [7]\n"
+      "  9: IntConstant [10]\n"
+      "  12: IntConstant [13]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"
+      "  5: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  10: Add(7, 9) [13]\n"
+      "  11: Goto 4\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  7: Add(4, 6) [10]\n"
+      "  8: Goto 2\n"
+      "BasicBlock 4, pred: 2, succ: 5\n"
+      "  13: Add(10, 12)\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
   std::string expected_after =
-    "BasicBlock 0, succ: 1\n"
-    "  28: SuspendCheck\n"
-    "  29: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  26: ReturnVoid\n"
-    "BasicBlock 5, pred: 1\n"
-    "  27: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 3db254a..e9072b9 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -44,6 +44,21 @@
   }
 
  private:
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 528fe44..968e267 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -27,16 +27,29 @@
 
 namespace art {
 
+static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) {
+  return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid();
+}
+
+static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
+  if (!block->IsSingleTryBoundary()) {
+    return false;
+  }
+
+  HTryBoundary* boundary = block->GetLastInstruction()->AsTryBoundary();
+  return block->GetPredecessors().size() == 1u &&
+         boundary->GetNormalFlowSuccessor()->IsExitBlock() &&
+         !boundary->IsEntry();
+}
+
 void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
   current_block_ = block;
 
   // Check consistency with respect to predecessors of `block`.
   // Note: Counting duplicates with a sorted vector uses up to 6x less memory
-  // than ArenaSafeMap<HBasicBlock*, size_t>.
-  ArenaVector<HBasicBlock*> sorted_predecessors(
-      block->GetPredecessors().begin(),
-      block->GetPredecessors().end(),
-      GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
+  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+  ArenaVector<HBasicBlock*>& sorted_predecessors = blocks_storage_;
+  sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
   std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
   for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) {
     HBasicBlock* p = *it++;
@@ -57,11 +70,9 @@
 
   // Check consistency with respect to successors of `block`.
   // Note: Counting duplicates with a sorted vector uses up to 6x less memory
-  // than ArenaSafeMap<HBasicBlock*, size_t>.
-  ArenaVector<HBasicBlock*> sorted_successors(
-      block->GetSuccessors().begin(),
-      block->GetSuccessors().end(),
-      GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
+  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+  ArenaVector<HBasicBlock*>& sorted_successors = blocks_storage_;
+  sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
   std::sort(sorted_successors.begin(), sorted_successors.end());
   for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) {
     HBasicBlock* s = *it++;
@@ -89,28 +100,17 @@
                           block->GetBlockId()));
   }
 
-  // Ensure that only Return(Void) and Throw jump to Exit. An exiting
-  // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
+  // Ensure that only Return(Void) and Throw jump to Exit. An exiting TryBoundary
+  // may be between the instructions if the Throw/Return(Void) is in a try block.
   if (block->IsExitBlock()) {
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
-      if (predecessor->IsSingleTryBoundary()
-          && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) {
-        HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor();
-        HInstruction* last_instruction = real_predecessor->GetLastInstruction();
-        if (!last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected TryBoundary between %s:%d and Exit.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
-      } else {
-        HInstruction* last_instruction = predecessor->GetLastInstruction();
-        if (!last_instruction->IsReturn()
-            && !last_instruction->IsReturnVoid()
-            && !last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
+      HInstruction* last_instruction = IsExitTryBoundaryIntoExitBlock(predecessor) ?
+        predecessor->GetSinglePredecessor()->GetLastInstruction() :
+        predecessor->GetLastInstruction();
+      if (!IsAllowedToJumpToExitBlock(last_instruction)) {
+        AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
+                              last_instruction->DebugName(),
+                              last_instruction->GetId()));
       }
     }
   }
@@ -180,16 +180,15 @@
   // predecessors). Exceptional edges are synthesized and hence
   // not accounted for.
   if (block->GetSuccessors().size() > 1) {
-    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
-      if (successor->IsExitBlock() &&
-          block->IsSingleTryBoundary() &&
-          block->GetPredecessors().size() == 1u &&
-          block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
-        // Allowed critical edge Throw->TryBoundary->Exit.
-      } else if (successor->GetPredecessors().size() > 1) {
-        AddError(StringPrintf("Critical edge between blocks %d and %d.",
-                              block->GetBlockId(),
-                              successor->GetBlockId()));
+    if (IsExitTryBoundaryIntoExitBlock(block)) {
+      // Allowed critical edge (Throw/Return/ReturnVoid)->TryBoundary->Exit.
+    } else {
+      for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+        if (successor->GetPredecessors().size() > 1) {
+          AddError(StringPrintf("Critical edge between blocks %d and %d.",
+                                block->GetBlockId(),
+                                successor->GetBlockId()));
+        }
       }
     }
   }
@@ -259,6 +258,15 @@
   VisitInstruction(check);
 }
 
+void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) {
+  if (GetGraph()->IsCompilingOsr()) {
+    AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction"));
+  }
+
+  // Perform the instruction base checks too.
+  VisitInstruction(deopt);
+}
+
 void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
   ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
 
@@ -343,36 +351,34 @@
 
   // Ensure the uses of `instruction` are defined in a block of the graph,
   // and the entry in the use list is consistent.
-  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-       !use_it.Done(); use_it.Advance()) {
-    HInstruction* use = use_it.Current()->GetUser();
-    const HInstructionList& list = use->IsPhi()
-        ? use->GetBlock()->GetPhis()
-        : use->GetBlock()->GetInstructions();
-    if (!list.Contains(use)) {
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    const HInstructionList& list = user->IsPhi()
+        ? user->GetBlock()->GetPhis()
+        : user->GetBlock()->GetInstructions();
+    if (!list.Contains(user)) {
       AddError(StringPrintf("User %s:%d of instruction %d is not defined "
                             "in a basic block of the control-flow graph.",
-                            use->DebugName(),
-                            use->GetId(),
+                            user->DebugName(),
+                            user->GetId(),
                             instruction->GetId()));
     }
-    size_t use_index = use_it.Current()->GetIndex();
-    if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
+    size_t use_index = use.GetIndex();
+    if ((use_index >= user->InputCount()) || (user->InputAt(use_index) != instruction)) {
       AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
-                            use->DebugName(),
-                            use->GetId(),
+                            user->DebugName(),
+                            user->GetId(),
                             instruction->DebugName(),
                             instruction->GetId()));
     }
   }
 
   // Ensure the environment uses entries are consistent.
-  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
-       !use_it.Done(); use_it.Advance()) {
-    HEnvironment* use = use_it.Current()->GetUser();
-    size_t use_index = use_it.Current()->GetIndex();
-    if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) {
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    HEnvironment* user = use.GetUser();
+    size_t use_index = use.GetIndex();
+    if ((use_index >= user->Size()) || (user->GetInstructionAt(use_index) != instruction)) {
       AddError(StringPrintf("Environment user of %s:%d has a wrong "
                             "UseListNode index.",
                             instruction->DebugName(),
@@ -384,13 +390,11 @@
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
     HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
     HInstruction* input = input_record.GetInstruction();
-    HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
-    size_t use_index = use_node->GetIndex();
-    if ((use_node == nullptr)
-        || !input->GetUses().Contains(use_node)
-        || (use_index >= e)
-        || (use_index != i)) {
-      AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
+    if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
+        (input_record.GetUseNode() == input->GetUses().end()) ||
+        !input->GetUses().ContainsNode(*input_record.GetUseNode()) ||
+        (input_record.GetUseNode()->GetIndex() != i)) {
+      AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
                             "at input %u (%s:%d).",
                             instruction->DebugName(),
                             instruction->GetId(),
@@ -401,18 +405,17 @@
   }
 
   // Ensure an instruction dominates all its uses.
-  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-       !use_it.Done(); use_it.Advance()) {
-    HInstruction* use = use_it.Current()->GetUser();
-    if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (!user->IsPhi() && !instruction->StrictlyDominates(user)) {
       AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
                             "use %s:%d in block %d.",
                             instruction->DebugName(),
                             instruction->GetId(),
                             current_block_->GetBlockId(),
-                            use->DebugName(),
-                            use->GetId(),
-                            use->GetBlock()->GetBlockId()));
+                            user->DebugName(),
+                            user->GetId(),
+                            user->GetBlock()->GetBlockId()));
     }
   }
 
@@ -509,7 +512,8 @@
 
 void GraphChecker::VisitReturn(HReturn* ret) {
   VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
     AddError(StringPrintf("%s:%d does not jump to the exit block.",
                           ret->DebugName(),
                           ret->GetId()));
@@ -518,7 +522,8 @@
 
 void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
   VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
     AddError(StringPrintf("%s:%d does not jump to the exit block.",
                           ret->DebugName(),
                           ret->GetId()));
@@ -811,10 +816,11 @@
               phi->GetRegNumber(),
               type_str.str().c_str()));
         } else {
-          ArenaBitVector visited(GetGraph()->GetArena(),
-                                 0,
-                                 /* expandable */ true,
-                                 kArenaAllocGraphChecker);
+          // If we get here, make sure we allocate all the necessary storage at once
+          // because the BitVector reallocation strategy has very bad worst-case behavior.
+          ArenaBitVector& visited = visited_storage_;
+          visited.SetBit(GetGraph()->GetCurrentInstructionId());
+          visited.ClearAllBits();
           if (!IsConstantEquivalent(phi, other_phi, &visited)) {
             AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
                                   "are not equivalents of constants.",
@@ -840,17 +846,11 @@
           static_cast<int>(input_index),
           value));
     }
-  } else if (input->GetType() == Primitive::kPrimInt
-             && (input->IsPhi() ||
-                 input->IsAnd() ||
-                 input->IsOr() ||
-                 input->IsXor() ||
-                 input->IsSelect())) {
-    // TODO: We need a data-flow analysis to determine if the Phi or Select or
-    //       binary operation is actually Boolean. Allow for now.
-  } else if (input->GetType() != Primitive::kPrimBoolean) {
+  } else if (Primitive::PrimitiveKind(input->GetType()) != Primitive::kPrimInt) {
+    // TODO: We need a data-flow analysis to determine if an input like Phi,
+    //       Select or a binary operation is actually Boolean. Allow for now.
     AddError(StringPrintf(
-        "%s instruction %d has a non-Boolean input %d whose type is: %s.",
+        "%s instruction %d has a non-integer input %d whose type is: %s.",
         instruction->DebugName(),
         instruction->GetId(),
         static_cast<int>(input_index),
@@ -1014,4 +1014,19 @@
   }
 }
 
+void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) {
+  VisitInstruction(instruction);
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+  // Invariant: We should never generate a conversion to a Boolean value.
+  if (result_type == Primitive::kPrimBoolean) {
+    AddError(StringPrintf(
+        "%s %d converts to a %s (from a %s).",
+        instruction->DebugName(),
+        instruction->GetId(),
+        Primitive::PrettyDescriptor(result_type),
+        Primitive::PrettyDescriptor(input_type)));
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 27d5621..3060c80 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -33,7 +33,9 @@
       seen_ids_(graph->GetArena(),
                 graph->GetCurrentInstructionId(),
                 false,
-                kArenaAllocGraphChecker) {}
+                kArenaAllocGraphChecker),
+      blocks_storage_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)),
+      visited_storage_(graph->GetArena(), 0u, true, kArenaAllocGraphChecker) {}
 
   // Check the whole graph (in reverse post-order).
   void Run() {
@@ -55,6 +57,7 @@
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
   void VisitCondition(HCondition* op) OVERRIDE;
   void VisitConstant(HConstant* instruction) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* instruction) OVERRIDE;
   void VisitIf(HIf* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
@@ -65,6 +68,7 @@
   void VisitReturnVoid(HReturnVoid* ret) OVERRIDE;
   void VisitSelect(HSelect* instruction) OVERRIDE;
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
 
   void HandleLoop(HBasicBlock* loop_header);
   void HandleBooleanInput(HInstruction* instruction, size_t input_index);
@@ -102,6 +106,10 @@
   const char* const dump_prefix_;
   ArenaBitVector seen_ids_;
 
+  // To reduce the total arena memory allocation, we reuse the same storage.
+  ArenaVector<HBasicBlock*> blocks_storage_;
+  ArenaBitVector visited_storage_;
+
   DISALLOW_COPY_AND_ASSIGN(GraphChecker);
 };
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 3a9d242..6aec463 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -98,7 +98,9 @@
                                               DisassemblerOptions* options);
 class HGraphVisualizerDisassembler {
  public:
-  HGraphVisualizerDisassembler(InstructionSet instruction_set, const uint8_t* base_address)
+  HGraphVisualizerDisassembler(InstructionSet instruction_set,
+                               const uint8_t* base_address,
+                               const uint8_t* end_address)
       : instruction_set_(instruction_set), disassembler_(nullptr) {
     libart_disassembler_handle_ =
         dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW);
@@ -119,6 +121,7 @@
             instruction_set,
             new DisassemblerOptions(/* absolute_addresses */ false,
                                     base_address,
+                                    end_address,
                                     /* can_read_literals */ true)));
   }
 
@@ -174,45 +177,53 @@
         disassembler_(disasm_info_ != nullptr
                       ? new HGraphVisualizerDisassembler(
                             codegen_.GetInstructionSet(),
-                            codegen_.GetAssembler().CodeBufferBaseAddress())
+                            codegen_.GetAssembler().CodeBufferBaseAddress(),
+                            codegen_.GetAssembler().CodeBufferBaseAddress()
+                                + codegen_.GetAssembler().CodeSize())
                       : nullptr),
         indent_(0) {}
 
+  void Flush() {
+    // We use "\n" instead of std::endl to avoid implicit flushing which
+    // generates too many syscalls during debug-GC tests (b/27826765).
+    output_ << std::flush;
+  }
+
   void StartTag(const char* name) {
     AddIndent();
-    output_ << "begin_" << name << std::endl;
+    output_ << "begin_" << name << "\n";
     indent_++;
   }
 
   void EndTag(const char* name) {
     indent_--;
     AddIndent();
-    output_ << "end_" << name << std::endl;
+    output_ << "end_" << name << "\n";
   }
 
   void PrintProperty(const char* name, const char* property) {
     AddIndent();
-    output_ << name << " \"" << property << "\"" << std::endl;
+    output_ << name << " \"" << property << "\"\n";
   }
 
   void PrintProperty(const char* name, const char* property, int id) {
     AddIndent();
-    output_ << name << " \"" << property << id << "\"" << std::endl;
+    output_ << name << " \"" << property << id << "\"\n";
   }
 
   void PrintEmptyProperty(const char* name) {
     AddIndent();
-    output_ << name << std::endl;
+    output_ << name << "\n";
   }
 
   void PrintTime(const char* name) {
     AddIndent();
-    output_ << name << " " << time(nullptr) << std::endl;
+    output_ << name << " " << time(nullptr) << "\n";
   }
 
   void PrintInt(const char* name, int value) {
     AddIndent();
-    output_ << name << " " << value << std::endl;
+    output_ << name << " " << value << "\n";
   }
 
   void AddIndent() {
@@ -249,7 +260,7 @@
     if (block->IsEntryBlock() && (disasm_info_ != nullptr)) {
       output_ << " \"" << kDisassemblyBlockFrameEntry << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void PrintSuccessors(HBasicBlock* block) {
@@ -258,7 +269,7 @@
     for (HBasicBlock* successor : block->GetNormalSuccessors()) {
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void PrintExceptionHandlers(HBasicBlock* block) {
@@ -272,7 +283,7 @@
         !disasm_info_->GetSlowPathIntervals().empty()) {
       output_ << " \"" << kDisassemblyBlockSlowPaths << "\" ";
     }
-    output_<< std::endl;
+    output_<< "\n";
   }
 
   void DumpLocation(std::ostream& stream, const Location& location) {
@@ -367,6 +378,10 @@
         << load_class->NeedsAccessCheck() << std::noboolalpha;
   }
 
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    StartAttributeStream("load_kind") << load_string->GetLoadKind();
+  }
+
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
     StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
@@ -379,6 +394,11 @@
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
+    StartAttributeStream("is_string_length") << std::boolalpha
+        << array_length->IsStringLength() << std::noboolalpha;
+  }
+
   void VisitArraySet(HArraySet* array_set) OVERRIDE {
     StartAttributeStream("value_can_be_null") << std::boolalpha
         << array_set->GetValueCanBeNull() << std::noboolalpha;
@@ -418,6 +438,20 @@
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
   }
 
+  void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE {
+    StartAttributeStream("field_name") << PrettyField(iget->GetFieldInfo().GetFieldIndex(),
+                                                      iget->GetFieldInfo().GetDexFile(),
+                                                      /* with type */ false);
+    StartAttributeStream("field_type") << iget->GetFieldType();
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE {
+    StartAttributeStream("field_name") << PrettyField(iset->GetFieldInfo().GetFieldIndex(),
+                                                      iset->GetFieldInfo().GetDexFile(),
+                                                      /* with type */ false);
+    StartAttributeStream("field_type") << iset->GetFieldType();
+  }
+
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
     StartAttributeStream("field_type") << field_access->GetFieldType();
   }
@@ -520,26 +554,19 @@
       }
     }
 
-    if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
-        || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
-        || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
-        || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
-        || IsPass(RegisterAllocator::kRegisterAllocatorPassName)
-        || IsPass(HGraphBuilder::kBuilderPassName)) {
-      HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-      if (info == nullptr) {
-        StartAttributeStream("loop") << "none";
+    HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+    if (loop_info == nullptr) {
+      StartAttributeStream("loop") << "none";
+    } else {
+      StartAttributeStream("loop") << "B" << loop_info->GetHeader()->GetBlockId();
+      HLoopInformation* outer = loop_info->GetPreHeader()->GetLoopInformation();
+      if (outer != nullptr) {
+        StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
       } else {
-        StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
-        HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation();
-        if (outer != nullptr) {
-          StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
-        } else {
-          StartAttributeStream("outer_loop") << "none";
-        }
-        StartAttributeStream("irreducible")
-            << std::boolalpha << info->IsIrreducible() << std::noboolalpha;
+        StartAttributeStream("outer_loop") << "none";
       }
+      StartAttributeStream("irreducible")
+          << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
     }
 
     if ((IsPass(HGraphBuilder::kBuilderPassName)
@@ -574,7 +601,7 @@
       auto it = disasm_info_->GetInstructionIntervals().find(instruction);
       if (it != disasm_info_->GetInstructionIntervals().end()
           && it->second.start != it->second.end) {
-        output_ << std::endl;
+        output_ << "\n";
         disassembler_->Disassemble(output_, it->second.start, it->second.end);
       }
     }
@@ -584,17 +611,12 @@
     for (HInstructionIterator it(list); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       int bci = 0;
-      size_t num_uses = 0;
-      for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-           !use_it.Done();
-           use_it.Advance()) {
-        ++num_uses;
-      }
+      size_t num_uses = instruction->GetUses().SizeSlow();
       AddIndent();
       output_ << bci << " " << num_uses << " "
               << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
       PrintInstruction(instruction);
-      output_ << " " << kEndInstructionMarker << std::endl;
+      output_ << " " << kEndInstructionMarker << "\n";
     }
   }
 
@@ -638,10 +660,10 @@
     output_ << "    0 0 disasm " << kDisassemblyBlockFrameEntry << " ";
     GeneratedCodeInterval frame_entry = disasm_info_->GetFrameEntryInterval();
     if (frame_entry.start != frame_entry.end) {
-      output_ << std::endl;
+      output_ << "\n";
       disassembler_->Disassemble(output_, frame_entry.start, frame_entry.end);
     }
-    output_ << kEndInstructionMarker << std::endl;
+    output_ << kEndInstructionMarker << "\n";
     DumpEndOfDisassemblyBlock();
   }
 
@@ -657,9 +679,9 @@
         GetGraph()->HasExitBlock() ? GetGraph()->GetExitBlock()->GetBlockId() : -1,
         -1);
     for (SlowPathCodeInfo info : disasm_info_->GetSlowPathIntervals()) {
-      output_ << "    0 0 disasm " << info.slow_path->GetDescription() << std::endl;
+      output_ << "    0 0 disasm " << info.slow_path->GetDescription() << "\n";
       disassembler_->Disassemble(output_, info.code_interval.start, info.code_interval.end);
-      output_ << kEndInstructionMarker << std::endl;
+      output_ << kEndInstructionMarker << "\n";
     }
     DumpEndOfDisassemblyBlock();
   }
@@ -680,6 +702,7 @@
       DumpDisassemblyBlockForSlowPaths();
     }
     EndTag("cfg");
+    Flush();
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -719,7 +742,7 @@
       for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
         output_ << inputs.Current()->GetId() << " ";
       }
-      output_ << "]" << std::endl;
+      output_ << "]\n";
     }
     EndTag("locals");
     EndTag("states");
@@ -761,6 +784,7 @@
   printer.PrintProperty("method", method_name);
   printer.PrintTime("date");
   printer.EndTag("compilation");
+  printer.Flush();
 }
 
 void HGraphVisualizer::DumpGraph(const char* pass_name,
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index f7eb2ad..1e86b75 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -41,7 +41,7 @@
         num_buckets_(kMinimumNumberOfBuckets),
         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
-        num_entries_(0) {
+        num_entries_(0u) {
     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
     DCHECK(IsPowerOfTwo(num_buckets_));
     buckets_owned_.SetInitialBits(num_buckets_);
@@ -49,29 +49,35 @@
 
   // Copy constructor. Depending on the load factor, it will either make a deep
   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
-  ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy)
+  ValueSet(ArenaAllocator* allocator, const ValueSet& other)
       : allocator_(allocator),
-        num_buckets_(to_copy.IdealBucketCount()),
+        num_buckets_(other.IdealBucketCount()),
         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
-        num_entries_(to_copy.num_entries_) {
+        num_entries_(0u) {
     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
     // buckets_owned_ are initialized to null and false, respectively.
     DCHECK(IsPowerOfTwo(num_buckets_));
-    if (num_buckets_ == to_copy.num_buckets_) {
-      // Hash table remains the same size. We copy the bucket pointers and leave
-      // all buckets_owned_ bits false.
-      memcpy(buckets_, to_copy.buckets_, num_buckets_ * sizeof(Node*));
+    PopulateFromInternal(other, /* is_dirty */ false);
+  }
+
+  // Erases all values in this set and populates it with values from `other`.
+  void PopulateFrom(const ValueSet& other) {
+    if (this == &other) {
+      return;
+    }
+    PopulateFromInternal(other, /* is_dirty */ true);
+  }
+
+  // Returns true if `this` has enough buckets so that if `other` is copied into
+  // it, the load factor will not cross the upper threshold.
+  // If `exact_match` is set, true is returned only if `this` has the ideal
+  // number of buckets. Larger number of buckets is allowed otherwise.
+  bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
+    if (exact_match) {
+      return other.IdealBucketCount() == num_buckets_;
     } else {
-      // Hash table size changes. We copy and rehash all entries, and set all
-      // buckets_owned_ bits to true.
-      for (size_t i = 0; i < to_copy.num_buckets_; ++i) {
-        for (Node* node = to_copy.buckets_[i]; node != nullptr; node = node->GetNext()) {
-          size_t new_index = BucketIndex(node->GetHashCode());
-          buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
-        }
-      }
-      buckets_owned_.SetInitialBits(num_buckets_);
+      return other.IdealBucketCount() <= num_buckets_;
     }
   }
 
@@ -152,6 +158,46 @@
   size_t GetNumberOfEntries() const { return num_entries_; }
 
  private:
+  // Copies all entries from `other` to `this`.
+  // If `is_dirty` is set to true, existing data will be wiped first. It is
+  // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise.
+  void PopulateFromInternal(const ValueSet& other, bool is_dirty) {
+    DCHECK_NE(this, &other);
+    DCHECK_GE(num_buckets_, other.IdealBucketCount());
+
+    if (num_buckets_ == other.num_buckets_) {
+      // Hash table remains the same size. We copy the bucket pointers and leave
+      // all buckets_owned_ bits false.
+      if (is_dirty) {
+        buckets_owned_.ClearAllBits();
+      } else {
+        DCHECK_EQ(buckets_owned_.NumSetBits(), 0u);
+      }
+      memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
+    } else {
+      // Hash table size changes. We copy and rehash all entries, and set all
+      // buckets_owned_ bits to true.
+      if (is_dirty) {
+        memset(buckets_, 0, num_buckets_ * sizeof(Node*));
+      } else {
+        if (kIsDebugBuild) {
+          for (size_t i = 0; i < num_buckets_; ++i) {
+            DCHECK(buckets_[i] == nullptr) << i;
+          }
+        }
+      }
+      for (size_t i = 0; i < other.num_buckets_; ++i) {
+        for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
+          size_t new_index = BucketIndex(node->GetHashCode());
+          buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
+        }
+      }
+      buckets_owned_.SetInitialBits(num_buckets_);
+    }
+
+    num_entries_ = other.num_entries_;
+  }
+
   class Node : public ArenaObject<kArenaAllocGvn> {
    public:
     Node(HInstruction* instruction, size_t hash_code, Node* next)
@@ -310,7 +356,9 @@
       : graph_(graph),
         allocator_(allocator),
         side_effects_(side_effects),
-        sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)) {}
+        sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)),
+        visited_blocks_(
+            allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {}
 
   void Run();
 
@@ -323,11 +371,37 @@
   ArenaAllocator* const allocator_;
   const SideEffectsAnalysis& side_effects_;
 
+  ValueSet* FindSetFor(HBasicBlock* block) const {
+    ValueSet* result = sets_[block->GetBlockId()];
+    DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
+    return result;
+  }
+
+  void AbandonSetFor(HBasicBlock* block) {
+    DCHECK(sets_[block->GetBlockId()] != nullptr)
+        << "Block B" << block->GetBlockId() << " expected to have a set";
+    sets_[block->GetBlockId()] = nullptr;
+  }
+
+  // Returns false if the GlobalValueNumberer has already visited all blocks
+  // which may reference `block`.
+  bool WillBeReferencedAgain(HBasicBlock* block) const;
+
+  // Iterates over visited blocks and finds one which has a ValueSet such that:
+  // (a) it will not be referenced in the future, and
+  // (b) it can hold a copy of `reference_set` with a reasonable load factor.
+  HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
+                                                 const ValueSet& reference_set) const;
+
   // ValueSet for blocks. Initially null, but for an individual block they
   // are allocated and populated by the dominator, and updated by all blocks
   // in the path from the dominator to the block.
   ArenaVector<ValueSet*> sets_;
 
+  // BitVector which serves as a fast-access map from block id to
+  // visited/unvisited boolean.
+  ArenaBitVector visited_blocks_;
+
   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
 };
 
@@ -344,6 +418,7 @@
 
 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
   ValueSet* set = nullptr;
+
   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
     // The entry block should only accumulate constant instructions, and
@@ -352,28 +427,49 @@
     set = new (allocator_) ValueSet(allocator_);
   } else {
     HBasicBlock* dominator = block->GetDominator();
-    ValueSet* dominator_set = sets_[dominator->GetBlockId()];
+    ValueSet* dominator_set = FindSetFor(dominator);
+
     if (dominator->GetSuccessors().size() == 1) {
-      DCHECK_EQ(dominator->GetSuccessors()[0], block);
+      // `block` is a direct successor of its dominator. No need to clone the
+      // dominator's set, `block` can take over its ownership including its buckets.
+      DCHECK_EQ(dominator->GetSingleSuccessor(), block);
+      AbandonSetFor(dominator);
       set = dominator_set;
     } else {
-      // We have to copy if the dominator has other successors, or `block` is not a successor
-      // of the dominator.
-      set = new (allocator_) ValueSet(allocator_, *dominator_set);
+      // Try to find a basic block which will never be referenced again and whose
+      // ValueSet can therefore be recycled. We will need to copy `dominator_set`
+      // into the recycled set, so we pass `dominator_set` as a reference for size.
+      HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
+      if (recyclable == nullptr) {
+        // No block with a suitable ValueSet found. Allocate a new one and
+        // copy `dominator_set` into it.
+        set = new (allocator_) ValueSet(allocator_, *dominator_set);
+      } else {
+        // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
+        set = FindSetFor(recyclable);
+        AbandonSetFor(recyclable);
+        set->PopulateFrom(*dominator_set);
+      }
     }
+
     if (!set->IsEmpty()) {
       if (block->IsLoopHeader()) {
-        if (block->GetLoopInformation()->IsIrreducible()) {
+        if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
-          // loop header.
+          // loop header. We clear the set at entry of irreducible loops and any loop containing
+          // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
+          // across the irreducible loop.
+          // Note that, if we're not compiling OSR, we could still do GVN and introduce
+          // phis at irreducible loop headers. We decided it was not worth the complexity.
           set->Clear();
         } else {
+          DCHECK(!block->GetLoopInformation()->IsIrreducible());
           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
           set->Kill(side_effects_.GetLoopEffects(block));
         }
       } else if (predecessors.size() > 1) {
         for (HBasicBlock* predecessor : predecessors) {
-          set->IntersectWith(sets_[predecessor->GetBlockId()]);
+          set->IntersectWith(FindSetFor(predecessor));
           if (set->IsEmpty()) {
             break;
           }
@@ -413,6 +509,60 @@
     }
     current = next;
   }
+
+  visited_blocks_.SetBit(block->GetBlockId());
+}
+
+bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
+  DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
+
+  for (auto dominated_block : block->GetDominatedBlocks()) {
+    if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
+      return true;
+    }
+  }
+
+  for (auto successor : block->GetSuccessors()) {
+    if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
+    HBasicBlock* block, const ValueSet& reference_set) const {
+  HBasicBlock* secondary_match = nullptr;
+
+  for (size_t block_id : visited_blocks_.Indexes()) {
+    ValueSet* current_set = sets_[block_id];
+    if (current_set == nullptr) {
+      // Set was already recycled.
+      continue;
+    }
+
+    HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
+
+    // We test if `current_set` has enough buckets to store a copy of
+    // `reference_set` with a reasonable load factor. If we find a set whose
+    // number of buckets matches perfectly, we return right away. If we find one
+    // that is larger, we return it if no perfectly-matching set is found.
+    // Note that we defer testing WillBeReferencedAgain until all other criteria
+    // have been satisfied because it might be expensive.
+    if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
+      if (!WillBeReferencedAgain(current_block)) {
+        return current_block;
+      }
+    } else if (secondary_match == nullptr &&
+               current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
+      if (!WillBeReferencedAgain(current_block)) {
+        secondary_match = current_block;
+      }
+    }
+  }
+
+  return secondary_match;
 }
 
 void GVNOptimization::Run() {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 56dc088..6abf00e 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -357,8 +357,10 @@
                                                              Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
+  outer_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   outer_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   inner_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   inner_loop_body->AddInstruction(new (&allocator) HGoto());
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 266cb10..c06d19d 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -141,7 +141,7 @@
   DCHECK(stack_.empty());
   map_.clear();
 
-  // Determine the loop's trip count.
+  // Determine the loop's trip-count.
   VisitControl(loop);
 }
 
@@ -917,6 +917,7 @@
         info1->induction_class == info2->induction_class &&
         info1->operation       == info2->operation       &&
         info1->fetch           == info2->fetch           &&
+        info1->type            == info2->type            &&
         InductionEqual(info1->op_a, info2->op_a)         &&
         InductionEqual(info1->op_b, info2->op_b);
   }
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 0fbb67d..580d24b 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -157,6 +157,13 @@
         iva_->LookupInfo(loop_body_[d]->GetLoopInformation(), instruction));
   }
 
+  // Returns true if instructions have identical induction.
+  bool HaveSameInduction(HInstruction* instruction1, HInstruction* instruction2) {
+    return HInductionVarAnalysis::InductionEqual(
+      iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction1),
+      iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction2));
+  }
+
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
     graph_->BuildDominatorTree();
@@ -228,6 +235,9 @@
   EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
   EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[0], 0).c_str());
 
+  // Offset matters!
+  EXPECT_FALSE(HaveSameInduction(store->InputAt(1), increment_[0]));
+
   // Trip-count.
   EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
                GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
@@ -320,6 +330,10 @@
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+
+  // Both increments get same induction.
+  EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc1));
+  EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc2));
 }
 
 TEST_F(InductionVarAnalysisTest, FindTwoWayDerivedInduction) {
@@ -570,6 +584,33 @@
   }
 }
 
+TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) {
+  // Setup:
+  // for (int i = 0; i < 100; i++) {
+  //   k = (byte) i;
+  //   a[k] = 0;
+  //   a[i] = 0;
+  // }
+  BuildLoopNest(1);
+  HInstruction *conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+  HInstruction* store1 = InsertArrayStore(conv, 0);
+  HInstruction* store2 = InsertArrayStore(basic_[0], 0);
+  PerformInductionVarAnalysis();
+
+  // Regular int induction (i) is "transferred" over conversion into byte induction (k).
+  EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (0)):PrimInt",  GetInductionInfo(store2->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimInt",  GetInductionInfo(increment_[0], 0).c_str());
+
+  // Type matters!
+  EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1)));
+
+  // Trip-count.
+  EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+}
+
 TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
   // Setup:
   // for (byte i = -128; i < 127; i++) {  // just fits!
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 7114dc5..59de895 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -308,7 +308,7 @@
 
   // Check if we can use an inline cache.
   ArtMethod* caller = graph_->GetArtMethod();
-  if (Runtime::Current()->UseJit()) {
+  if (Runtime::Current()->UseJitCompilation()) {
     // Under JIT, we should always know the caller.
     DCHECK(caller != nullptr);
     ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
@@ -322,7 +322,13 @@
         return false;
       } else if (ic.IsMonomorphic()) {
         MaybeRecordStat(kMonomorphicCall);
-        return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+        if (outermost_graph_->IsCompilingOsr()) {
+          // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+          // interpreter and it may have seen different receiver types.
+          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+        } else {
+          return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+        }
       } else if (ic.IsPolymorphic()) {
         MaybeRecordStat(kPolymorphicCall);
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
@@ -411,7 +417,10 @@
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
-  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
   rtp_fixup.Run();
 
   MaybeRecordStat(kInlinedMonomorphicCall);
@@ -507,6 +516,11 @@
       bool deoptimize = all_targets_inlined &&
           (i != InlineCache::kIndividualCacheSize - 1) &&
           (ic.GetTypeAt(i + 1) == nullptr);
+
+      if (outermost_graph_->IsCompilingOsr()) {
+        // We do not support HDeoptimize in OSR methods.
+        deoptimize = false;
+      }
       HInstruction* compare = AddTypeGuard(
           receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
       if (deoptimize) {
@@ -532,7 +546,10 @@
   MaybeRecordStat(kInlinedPolymorphicCall);
 
   // Run type propagation to get the guards typed.
-  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
   rtp_fixup.Run();
   return true;
 }
@@ -617,7 +634,7 @@
                                                     ArtMethod* resolved_method,
                                                     const InlineCache& ic) {
   // This optimization only works under JIT for now.
-  DCHECK(Runtime::Current()->UseJit());
+  DCHECK(Runtime::Current()->UseJitCompilation());
   if (graph_->GetInstructionSet() == kMips64) {
     // TODO: Support HClassTableGet for mips64.
     return false;
@@ -666,7 +683,8 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ false)) {
+  HInstruction* return_replacement = nullptr;
+  if (!TryBuildAndInline(invoke_instruction, actual_method, &return_replacement)) {
     return false;
   }
 
@@ -695,9 +713,6 @@
   }
 
   HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant);
-  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-      compare, invoke_instruction->GetDexPc());
-  // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(receiver_class, cursor);
   } else {
@@ -705,11 +720,26 @@
   }
   bb_cursor->InsertInstructionAfter(class_table_get, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, class_table_get);
-  bb_cursor->InsertInstructionAfter(deoptimize, compare);
-  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  if (outermost_graph_->IsCompilingOsr()) {
+    CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+  } else {
+    // TODO: Extend reference type propagation to understand the guard.
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+    if (return_replacement != nullptr) {
+      invoke_instruction->ReplaceWith(return_replacement);
+    }
+    invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  }
 
   // Run type propagation to get the guard typed.
-  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  ReferenceTypePropagation rtp_fixup(graph_,
+                                     outer_compilation_unit_.GetDexCache(),
+                                     handles_,
+                                     /* is_first_run */ false);
   rtp_fixup.Run();
 
   MaybeRecordStat(kInlinedPolymorphicCall);
@@ -735,6 +765,12 @@
                                  HInstruction** return_replacement) {
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
 
+  if (method->IsProxyMethod()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " is not inlined because of unimplemented inline support for proxy methods.";
+    return false;
+  }
+
   // Check whether we're allowed to inline. The outermost compilation unit is the relevant
   // dex file here (though the transitivity of an inline chain would allow checking the calller).
   if (!compiler_driver_->MayInline(method->GetDexFile(),
@@ -786,9 +822,14 @@
     return false;
   }
 
+  if (!method->IsCompilable()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " has soft failures un-handled by the compiler, so it cannot be inlined";
+  }
+
   if (!method->GetDeclaringClass()->IsVerified()) {
     uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
-    if (Runtime::Current()->UseJit() ||
+    if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
@@ -971,7 +1012,8 @@
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
   if (iget->GetType() == Primitive::kPrimNot) {
-    ReferenceTypePropagation rtp(graph_, handles_, /* is_first_run */ false);
+    // Use the same dex_cache that we used for field lookup as the hint_dex_cache.
+    ReferenceTypePropagation rtp(graph_, dex_cache, handles_, /* is_first_run */ false);
     rtp.Visit(iget);
   }
   return iget;
@@ -1012,7 +1054,6 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
   DexCompilationUnit dex_compilation_unit(
-      nullptr,
       caller_compilation_unit_.GetClassLoader(),
       class_linker,
       callee_dex_file,
@@ -1062,17 +1103,24 @@
       caller_instruction_counter);
   callee_graph->SetArtMethod(resolved_method);
 
-  OptimizingCompilerStats inline_stats;
+  // When they are needed, allocate `inline_stats` on the heap instead
+  // of on the stack, as Clang might produce a stack frame too large
+  // for this function, that would not fit the requirements of the
+  // `-Wframe-larger-than` option.
+  std::unique_ptr<OptimizingCompilerStats> inline_stats =
+      (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
                         resolved_method->GetDexFile(),
+                        *code_item,
                         compiler_driver_,
-                        &inline_stats,
+                        inline_stats.get(),
                         resolved_method->GetQuickenedInfo(),
-                        dex_cache);
+                        dex_cache,
+                        handles_);
 
-  if (builder.BuildGraph(*code_item, handles_) != kAnalysisSuccess) {
+  if (builder.BuildGraph() != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be built, so cannot be inlined";
     return false;
@@ -1111,41 +1159,10 @@
     }
   }
 
-  // Run simple optimizations on the graph.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
-  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
-  InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
-
-  HOptimization* optimizations[] = {
-    &intrinsics,
-    &sharpening,
-    &simplify,
-    &fold,
-    &dce,
-  };
-
-  for (size_t i = 0; i < arraysize(optimizations); ++i) {
-    HOptimization* optimization = optimizations[i];
-    optimization->Run();
-  }
-
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
-  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
-    HInliner inliner(callee_graph,
-                     outermost_graph_,
-                     codegen_,
-                     outer_compilation_unit_,
-                     dex_compilation_unit,
-                     compiler_driver_,
-                     handles_,
-                     stats_,
-                     total_number_of_dex_registers_ + code_item->registers_size_,
-                     depth_ + 1);
-    inliner.Run();
-    number_of_instructions_budget += inliner.number_of_inlined_instructions_;
-  }
+  size_t number_of_inlined_instructions =
+      RunOptimizations(callee_graph, code_item, dex_compilation_unit);
+  number_of_instructions_budget += number_of_inlined_instructions;
 
   // TODO: We should abort only if all predecessors throw. However,
   // HGraph::InlineInto currently does not handle an exit block with
@@ -1191,7 +1208,7 @@
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
-      if (number_of_instructions++ ==  number_of_instructions_budget) {
+      if (number_of_instructions++ == number_of_instructions_budget) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                        << " is not inlined because its caller has reached"
                        << " its instruction budget limit.";
@@ -1272,6 +1289,49 @@
   return true;
 }
 
+size_t HInliner::RunOptimizations(HGraph* callee_graph,
+                                  const DexFile::CodeItem* code_item,
+                                  const DexCompilationUnit& dex_compilation_unit) {
+  // Note: if the outermost_graph_ is being compiled OSR, we should not run any
+  // optimization that could lead to a HDeoptimize. The following optimizations do not.
+  HDeadCodeElimination dce(callee_graph, stats_);
+  HConstantFolding fold(callee_graph);
+  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
+  InstructionSimplifier simplify(callee_graph, stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
+
+  HOptimization* optimizations[] = {
+    &intrinsics,
+    &sharpening,
+    &simplify,
+    &fold,
+    &dce,
+  };
+
+  for (size_t i = 0; i < arraysize(optimizations); ++i) {
+    HOptimization* optimization = optimizations[i];
+    optimization->Run();
+  }
+
+  size_t number_of_inlined_instructions = 0u;
+  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
+    HInliner inliner(callee_graph,
+                     outermost_graph_,
+                     codegen_,
+                     outer_compilation_unit_,
+                     dex_compilation_unit,
+                     compiler_driver_,
+                     handles_,
+                     stats_,
+                     total_number_of_dex_registers_ + code_item->registers_size_,
+                     depth_ + 1);
+    inliner.Run();
+    number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+  }
+
+  return number_of_inlined_instructions;
+}
+
 void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         HInstruction* return_replacement,
@@ -1287,11 +1347,12 @@
         DCHECK(return_replacement->IsPhi());
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
-        if (cls != nullptr) {
+        if (cls != nullptr && !cls->IsErroneous()) {
           ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls);
           return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
               return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
         } else {
+          // Return inexact object type on failures.
           return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti());
         }
       }
@@ -1303,13 +1364,19 @@
         if (invoke_rti.IsStrictSupertypeOf(return_rti)
             || (return_rti.IsExact() && !invoke_rti.IsExact())
             || !return_replacement->CanBeNull()) {
-          ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run();
+          ReferenceTypePropagation(graph_,
+                                   outer_compilation_unit_.GetDexCache(),
+                                   handles_,
+                                   /* is_first_run */ false).Run();
         }
       }
     } else if (return_replacement->IsInstanceOf()) {
       if (do_rtp) {
         // Inlining InstanceOf into an If may put a tighter bound on reference types.
-        ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run();
+        ReferenceTypePropagation(graph_,
+                                 outer_compilation_unit_.GetDexCache(),
+                                 handles_,
+                                 /* is_first_run */ false).Run();
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index cdb2167..7cf1424 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -76,6 +76,12 @@
                                bool same_dex_file,
                                HInstruction** return_replacement);
 
+  // Run simple optimizations on `callee_graph`.
+  // Returns the number of inlined instructions.
+  size_t RunOptimizations(HGraph* callee_graph,
+                          const DexFile::CodeItem* code_item,
+                          const DexCompilationUnit& dex_compilation_unit);
+
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
   bool TryPatternSubstitution(HInvoke* invoke_instruction,
                               ArtMethod* resolved_method,
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
new file mode 100644
index 0000000..aaddc01
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.cc
@@ -0,0 +1,2691 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_builder.h"
+
+#include "bytecode_utils.h"
+#include "class_linker.h"
+#include "driver/compiler_options.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
+  if (compilation_stats_ != nullptr) {
+    compilation_stats_->RecordStat(compilation_stat);
+  }
+}
+
+HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
+  return block_builder_->GetBlockAt(dex_pc);
+}
+
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
+  const size_t vregs = graph_->GetNumberOfVRegs();
+  if (locals->size() != vregs) {
+    locals->resize(vregs, nullptr);
+
+    if (block->IsCatchBlock()) {
+      // We record incoming inputs of catch phis at throwing instructions and
+      // must therefore eagerly create the phis. Phis for undefined vregs will
+      // be deleted when the first throwing instruction with the vreg undefined
+      // is encountered. Unused phis will be removed by dead phi analysis.
+      for (size_t i = 0; i < vregs; ++i) {
+        // No point in creating the catch phi if it is already undefined at
+        // the first throwing instruction.
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena_) HPhi(
+              arena_,
+              i,
+              0,
+              current_local_value->GetType());
+          block->AddPhi(phi);
+          (*locals)[i] = phi;
+        }
+      }
+    }
+  }
+  return locals;
+}
+
+HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
+  return (*locals)[local];
+}
+
+void HInstructionBuilder::InitializeBlockLocals() {
+  current_locals_ = GetLocalsFor(current_block_);
+
+  if (current_block_->IsCatchBlock()) {
+    // Catch phis were already created and inputs collected from throwing sites.
+    if (kIsDebugBuild) {
+      // Make sure there was at least one throwing instruction which initialized
+      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
+      // visited already (from HTryBoundary scoping and reverse post order).
+      bool catch_block_visited = false;
+      for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (current == current_block_) {
+          catch_block_visited = true;
+        } else if (current->IsTryBlock()) {
+          const HTryBoundary& try_entry = current->GetTryCatchInformation()->GetTryEntry();
+          if (try_entry.HasExceptionHandler(*current_block_)) {
+            DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
+          }
+        }
+      }
+      DCHECK_EQ(current_locals_->size(), graph_->GetNumberOfVRegs())
+          << "No instructions throwing into a live catch block.";
+    }
+  } else if (current_block_->IsLoopHeader()) {
+    // If the block is a loop header, we know we only have visited the pre header
+    // because we are visiting in reverse post order. We create phis for all initialized
+    // locals from the pre header. Their inputs will be populated at the end of
+    // the analysis.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      HInstruction* incoming =
+          ValueOfLocalAt(current_block_->GetLoopInformation()->GetPreHeader(), local);
+      if (incoming != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            0,
+            incoming->GetType());
+        current_block_->AddPhi(phi);
+        (*current_locals_)[local] = phi;
+      }
+    }
+
+    // Save the loop header so that the last phase of the analysis knows which
+    // blocks need to be updated.
+    loop_headers_.push_back(current_block_);
+  } else if (current_block_->GetPredecessors().size() > 0) {
+    // All predecessors have already been visited because we are visiting in reverse post order.
+    // We merge the values of all locals, creating phis if those values differ.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      bool one_predecessor_has_no_value = false;
+      bool is_different = false;
+      HInstruction* value = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+
+      for (HBasicBlock* predecessor : current_block_->GetPredecessors()) {
+        HInstruction* current = ValueOfLocalAt(predecessor, local);
+        if (current == nullptr) {
+          one_predecessor_has_no_value = true;
+          break;
+        } else if (current != value) {
+          is_different = true;
+        }
+      }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
+      if (is_different) {
+        HInstruction* first_input = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            current_block_->GetPredecessors().size(),
+            first_input->GetType());
+        for (size_t i = 0; i < current_block_->GetPredecessors().size(); i++) {
+          HInstruction* pred_value = ValueOfLocalAt(current_block_->GetPredecessors()[i], local);
+          phi->SetRawInputAt(i, pred_value);
+        }
+        current_block_->AddPhi(phi);
+        value = phi;
+      }
+      (*current_locals_)[local] = value;
+    }
+  }
+}
+
+void HInstructionBuilder::PropagateLocalsToCatchBlocks() {
+  const HTryBoundary& try_entry = current_block_->GetTryCatchInformation()->GetTryEntry();
+  for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
+    ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
+    DCHECK_EQ(handler_locals->size(), current_locals_->size());
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      HInstruction* handler_value = (*handler_locals)[vreg];
+      if (handler_value == nullptr) {
+        // Vreg was undefined at a previously encountered throwing instruction
+        // and the catch phi was deleted. Do not record the local value.
+        continue;
+      }
+      DCHECK(handler_value->IsPhi());
+
+      HInstruction* local_value = (*current_locals_)[vreg];
+      if (local_value == nullptr) {
+        // This is the first instruction throwing into `catch_block` where
+        // `vreg` is undefined. Delete the catch phi.
+        catch_block->RemovePhi(handler_value->AsPhi());
+        (*handler_locals)[vreg] = nullptr;
+      } else {
+        // Vreg has been defined at all instructions throwing into `catch_block`
+        // encountered so far. Record the local value in the catch phi.
+        handler_value->AsPhi()->AddInput(local_value);
+      }
+    }
+  }
+}
+
+void HInstructionBuilder::AppendInstruction(HInstruction* instruction) {
+  current_block_->AddInstruction(instruction);
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InsertInstructionAtTop(HInstruction* instruction) {
+  if (current_block_->GetInstructions().IsEmpty()) {
+    current_block_->AddInstruction(instruction);
+  } else {
+    current_block_->InsertInstructionBefore(instruction, current_block_->GetFirstInstruction());
+  }
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) {
+  if (instruction->NeedsEnvironment()) {
+    HEnvironment* environment = new (arena_) HEnvironment(
+        arena_,
+        current_locals_->size(),
+        graph_->GetDexFile(),
+        graph_->GetMethodIdx(),
+        instruction->GetDexPc(),
+        graph_->GetInvokeType(),
+        instruction);
+    environment->CopyFrom(*current_locals_);
+    instruction->SetRawEnvironment(environment);
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) {
+  HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot);
+  if (!ref->CanBeNull()) {
+    return ref;
+  }
+
+  HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc);
+  AppendInstruction(null_check);
+  return null_check;
+}
+
+void HInstructionBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocalAt(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
+static bool IsBlockPopulated(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    // Suspend checks were inserted into loop headers during building of dominator tree.
+    DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
+    return block->GetFirstInstruction() != block->GetLastInstruction();
+  } else {
+    return !block->GetInstructions().IsEmpty();
+  }
+}
+
+bool HInstructionBuilder::Build() {
+  locals_for_.resize(graph_->GetBlocks().size(),
+                     ArenaVector<HInstruction*>(arena_->Adapter(kArenaAllocGraphBuilder)));
+
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations = nullptr;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    FindNativeDebugInfoLocations(native_debug_info_locations);
+  }
+
+  for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
+    current_block_ = block_it.Current();
+    uint32_t block_dex_pc = current_block_->GetDexPc();
+
+    InitializeBlockLocals();
+
+    if (current_block_->IsEntryBlock()) {
+      InitializeParameters();
+      AppendInstruction(new (arena_) HSuspendCheck(0u));
+      AppendInstruction(new (arena_) HGoto(0u));
+      continue;
+    } else if (current_block_->IsExitBlock()) {
+      AppendInstruction(new (arena_) HExit());
+      continue;
+    } else if (current_block_->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(current_block_->GetDexPc());
+      current_block_->GetLoopInformation()->SetSuspendCheck(suspend_check);
+      // This is slightly odd because the loop header might not be empty (TryBoundary).
+      // But we're still creating the environment with locals from the top of the block.
+      InsertInstructionAtTop(suspend_check);
+    }
+
+    if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
+      // Synthetic block that does not need to be populated.
+      DCHECK(IsBlockPopulated(current_block_));
+      continue;
+    }
+
+    DCHECK(!IsBlockPopulated(current_block_));
+
+    for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
+      if (current_block_ == nullptr) {
+        // The previous instruction ended this block.
+        break;
+      }
+
+      uint32_t dex_pc = it.CurrentDexPc();
+      if (dex_pc != block_dex_pc && FindBlockStartingAt(dex_pc) != nullptr) {
+        // This dex_pc starts a new basic block.
+        break;
+      }
+
+      if (current_block_->IsTryBlock() && IsThrowingDexInstruction(it.CurrentInstruction())) {
+        PropagateLocalsToCatchBlocks();
+      }
+
+      if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+        AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+
+      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+        return false;
+      }
+    }
+
+    if (current_block_ != nullptr) {
+      // Branching instructions clear current_block, so we know the last
+      // instruction of the current block is not a branching instruction.
+      // We add an unconditional Goto to the next block.
+      DCHECK_EQ(current_block_->GetSuccessors().size(), 1u);
+      AppendInstruction(new (arena_) HGoto());
+    }
+  }
+
+  SetLoopHeaderPhiInputs();
+
+  return true;
+}
+
+void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item_.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION: {
+        // Stop in native debugger after the exception has been moved.
+        // The compiler also expects the move at the start of basic block so
+        // we do not want to interfere by inserting native-debug-info before it.
+        locations->ClearBit(inst->GetDexPc(code_item_.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item_.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Type type) const {
+  HInstruction* value = (*current_locals_)[reg_number];
+  DCHECK(value != nullptr);
+
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (type != value->GetType()) {
+    if (Primitive::IsFloatingPointType(type)) {
+      return ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+    } else if (type == Primitive::kPrimNot) {
+      return ssa_builder_->GetReferenceTypeEquivalent(value);
+    }
+  }
+
+  return value;
+}
+
+void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_value) {
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+}
+
+void HInstructionBuilder::InitializeParameters() {
+  DCHECK(current_block_->IsEntryBlock());
+
+  // dex_compilation_unit_ is null only when unit testing.
+  if (dex_compilation_unit_ == nullptr) {
+    return;
+  }
+
+  const char* shorty = dex_compilation_unit_->GetShorty();
+  uint16_t number_of_parameters = graph_->GetNumberOfInVRegs();
+  uint16_t locals_index = graph_->GetNumberOfLocalVRegs();
+  uint16_t parameter_index = 0;
+
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
+  if (!dex_compilation_unit_->IsStatic()) {
+    // Add the implicit 'this' argument, not expressed in the signature.
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
+                                                              Primitive::kPrimNot,
+                                                              true);
+    AppendInstruction(parameter);
+    UpdateLocal(locals_index++, parameter);
+    number_of_parameters--;
+  }
+
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
+    AppendInstruction(parameter);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    UpdateLocal(locals_index++, parameter);
+    if (Primitive::Is64BitType(parameter->GetType())) {
+      i++;
+      locals_index++;
+      parameter_index++;
+    }
+  }
+}
+
+template<typename T>
+void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(first, second, dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::Unop_12x(const Instruction& instruction,
+                                   Primitive::Type type,
+                                   uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Conversion_12x(const Instruction& instruction,
+                                         Primitive::Type input_type,
+                                         Primitive::Type result_type,
+                                         uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
+  AppendInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Binop_23x_cmp(const Instruction& instruction,
+                                        Primitive::Type type,
+                                        ComparisonBias bias,
+                                        uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
+  Thread* self = Thread::Current();
+  return cu->IsConstructor()
+      && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+}
+
+// Returns true if `block` has only one successor which starts at the next
+// dex_pc after `instruction` at `dex_pc`.
+static bool IsFallthroughInstruction(const Instruction& instruction,
+                                     uint32_t dex_pc,
+                                     HBasicBlock* block) {
+  uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+  return block->GetSingleSuccessor()->GetDexPc() == next_dex_pc;
+}
+
+void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  DexSwitchTable table(instruction, dex_pc);
+
+  if (table.GetNumEntries() == 0) {
+    // Empty Switch. Code falls through to the next block.
+    DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_));
+    AppendInstruction(new (arena_) HGoto(dex_pc));
+  } else if (table.ShouldBuildDecisionTree()) {
+    for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) {
+      HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc);
+      HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc);
+      AppendInstruction(comparison);
+      AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+
+      if (!it.IsLast()) {
+        current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex());
+      }
+    }
+  } else {
+    AppendInstruction(
+        new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc));
+  }
+
+  current_block_ = nullptr;
+}
+
+void HInstructionBuilder::BuildReturn(const Instruction& instruction,
+                                      Primitive::Type type,
+                                      uint32_t dex_pc) {
+  if (type == Primitive::kPrimVoid) {
+    if (graph_->ShouldGenerateConstructorBarrier()) {
+      // The compilation unit is null during testing.
+      if (dex_compilation_unit_ != nullptr) {
+        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_))
+          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
+      }
+      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+    }
+    AppendInstruction(new (arena_) HReturnVoid(dex_pc));
+  } else {
+    HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    AppendInstruction(new (arena_) HReturn(value, dex_pc));
+  }
+  current_block_ = nullptr;
+}
+
+static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
+  switch (opcode) {
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+      return kStatic;
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+      return kDirect;
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      return kVirtual;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      return kInterface;
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_SUPER:
+      return kSuper;
+    default:
+      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
+      UNREACHABLE();
+  }
+}
+
+ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+  // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId
+  // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache.
+  Handle<mirror::Class> methods_class(hs.NewHandle(class_linker->ResolveReferencedClassOfMethod(
+      method_idx, dex_compilation_unit_->GetDexCache(), class_loader)));
+
+  if (UNLIKELY(methods_class.Get() == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
+  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
+  // which require runtime handling.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // We could not determine the method's class we need to wait until runtime.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    if (!methods_class->IsAssignableFrom(compiling_class.Get())) {
+      // We cannot statically determine the target method. The runtime will throw a
+      // NoSuchMethodError on this one.
+      return nullptr;
+    }
+    ArtMethod* actual_method;
+    if (methods_class->IsInterface()) {
+      actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
+          resolved_method, class_linker->GetImagePointerSize());
+    } else {
+      uint16_t vtable_index = resolved_method->GetMethodIndex();
+      actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+          vtable_index, class_linker->GetImagePointerSize());
+    }
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // The back-end code generator relies on this check in order to ensure that it will not
+      // attempt to read the dex_cache with a dex_method_index that is not from the correct
+      // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
+      // builder, which means that the code-generator (and compiler driver during sharpening and
+      // inliner, maybe) might invoke an incorrect method.
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      //       could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
+bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
+                                      uint32_t dex_pc,
+                                      uint32_t method_idx,
+                                      uint32_t number_of_vreg_arguments,
+                                      bool is_range,
+                                      uint32_t* args,
+                                      uint32_t register_index) {
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
+  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
+  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
+
+  // Remove the return type from the 'proto'.
+  size_t number_of_arguments = strlen(descriptor) - 1;
+  if (invoke_type != kStatic) {  // instance call
+    // One extra argument for 'this'.
+    number_of_arguments++;
+  }
+
+  MethodReference target_method(dex_file_, method_idx);
+
+  // Special handling for string init.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
+                                                       dex_file_,
+                                                       &string_init_offset);
+  // Replace calls to String.<init> with StringFactory.
+  if (is_string_init) {
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dchecked_integral_cast<uint64_t>(string_init_offset),
+        0U
+    };
+    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
+        arena_,
+        number_of_arguments - 1,
+        Primitive::kPrimNot /*return_type */,
+        dex_pc,
+        method_idx,
+        target_method,
+        dispatch_info,
+        invoke_type,
+        kStatic /* optimized_invoke_type */,
+        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    return HandleStringInit(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor);
+  }
+
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
+                                                     number_of_arguments,
+                                                     return_type,
+                                                     dex_pc,
+                                                     method_idx,
+                                                     invoke_type);
+    return HandleInvoke(invoke,
+                        number_of_vreg_arguments,
+                        args,
+                        register_index,
+                        is_range,
+                        descriptor,
+                        nullptr /* clinit_check */);
+  }
+
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+  HInvoke* invoke = nullptr;
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
+    // By default, consider that the called method implicitly requires
+    // an initialization check of its declaring method.
+    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
+        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
+    }
+
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0U
+    };
+    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
+                                                number_of_arguments,
+                                                return_type,
+                                                dex_pc,
+                                                method_idx,
+                                                target_method,
+                                                dispatch_info,
+                                                invoke_type,
+                                                invoke_type,
+                                                clinit_check_requirement);
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeVirtual(arena_,
+                                         number_of_arguments,
+                                         return_type,
+                                         dex_pc,
+                                         method_idx,
+                                         resolved_method->GetMethodIndex());
+  } else {
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeInterface(arena_,
+                                           number_of_arguments,
+                                           return_type,
+                                           dex_pc,
+                                           method_idx,
+                                           resolved_method->GetDexMethodIndex());
+  }
+
+  return HandleInvoke(invoke,
+                      number_of_vreg_arguments,
+                      args,
+                      register_index,
+                      is_range,
+                      descriptor,
+                      clinit_check);
+}
+
+bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+
+  bool finalizable;
+  bool needs_access_check = NeedsAccessCheck(type_index, dex_cache, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || needs_access_check)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      needs_access_check,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, type_index));
+
+  AppendInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  AppendInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      needs_access_check,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
+
+  // The index at which the method's class is stored in the DexCache's type array.
+  uint32_t storage_index = DexFile::kDexNoIndex;
+  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                               GetCompilingClass(),
+                                                               resolved_method,
+                                                               method_idx,
+                                                               &storage_index);
+  }
+
+  HClinitCheck* clinit_check = nullptr;
+
+  if (IsInitialized(resolved_method_class)) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+  } else if (storage_index != DexFile::kDexNoIndex) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index));
+    AppendInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(clinit_check);
+  }
+  return clinit_check;
+}
+
+bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
+                                               uint32_t number_of_vreg_arguments,
+                                               uint32_t* args,
+                                               uint32_t register_index,
+                                               bool is_range,
+                                               const char* descriptor,
+                                               size_t start_index,
+                                               size_t* argument_index) {
+  uint32_t descriptor_index = 1;  // Skip the return type.
+
+  for (size_t i = start_index;
+       // Make sure we don't go over the expected arguments or over the number of
+       // dex registers given. If the instruction was seen as dead by the verifier,
+       // it hasn't been properly checked.
+       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
+       i++, (*argument_index)++) {
+    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range
+        && is_wide
+        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
+      // reject any class where this is violated. However, the verifier only does these checks
+      // on non trivially dead instructions, so we just bailout the compilation.
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of non-sequential dex register pair in wide argument";
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      return false;
+    }
+    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+    invoke->SetArgumentAt(*argument_index, arg);
+    if (is_wide) {
+      i++;
+    }
+  }
+
+  if (*argument_index != invoke->GetNumberOfArguments()) {
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << " because of wrong number of arguments in invoke instruction";
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    return false;
+  }
+
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
+    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
+    (*argument_index)++;
+  }
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
+                                       uint32_t number_of_vreg_arguments,
+                                       uint32_t* args,
+                                       uint32_t register_index,
+                                       bool is_range,
+                                       const char* descriptor,
+                                       HClinitCheck* clinit_check) {
+  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 0;
+  size_t argument_index = 0;
+  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
+    HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0],
+                                             invoke->GetDexPc());
+    invoke->SetArgumentAt(0, arg);
+    start_index = 1;
+    argument_index = 1;
+  }
+
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  if (clinit_check != nullptr) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(invoke->IsInvokeStaticOrDirect());
+    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
+        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
+    invoke->SetArgumentAt(argument_index, clinit_check);
+    argument_index++;
+  }
+
+  AppendInstruction(invoke);
+  latest_result_ = invoke;
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
+                                           uint32_t number_of_vreg_arguments,
+                                           uint32_t* args,
+                                           uint32_t register_index,
+                                           bool is_range,
+                                           const char* descriptor) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 1;
+  size_t argument_index = 0;
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  AppendInstruction(invoke);
+
+  // This is a StringFactory call, not an actual String constructor. Its result
+  // replaces the empty String pre-allocated by NewInstance.
+  uint32_t orig_this_reg = is_range ? register_index : args[0];
+  HInstruction* arg_this = LoadLocal(orig_this_reg, Primitive::kPrimNot);
+
+  // Replacing the NewInstance might render it redundant. Keep a list of these
+  // to be visited once it is clear whether it is has remaining uses.
+  if (arg_this->IsNewInstance()) {
+    ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
+  } else {
+    DCHECK(arg_this->IsPhi());
+    // NewInstance is not the direct input of the StringFactory call. It might
+    // be redundant but optimizing this case is not worth the effort.
+  }
+
+  // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+  for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+    if ((*current_locals_)[vreg] == arg_this) {
+      (*current_locals_)[vreg] = invoke;
+    }
+  }
+
+  return true;
+}
+
+static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  return Primitive::GetType(type[0]);
+}
+
+bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
+                                                   uint32_t dex_pc,
+                                                   bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_22c();
+  uint32_t obj_reg = instruction.VRegB_22c();
+  uint16_t field_index;
+  if (instruction.IsQuickened()) {
+    if (!CanDecodeQuickenedInfo()) {
+      return false;
+    }
+    field_index = LookupQuickenedInfo(dex_pc);
+  } else {
+    field_index = instruction.VRegC_22c();
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtField* resolved_field =
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
+
+
+  HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc);
+
+  Primitive::Type field_type = (resolved_field == nullptr)
+      ? GetFieldAccessType(*dex_file_, field_index)
+      : resolved_field->GetTypeAsPrimitiveType();
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    HInstruction* field_set = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
+                                                           value,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_set = new (arena_) HInstanceFieldSet(object,
+                                                 value,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_set);
+  } else {
+    HInstruction* field_get = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_get = new (arena_) HInstanceFieldGet(object,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_get);
+    UpdateLocal(source_or_dest_reg, field_get);
+  }
+
+  return true;
+}
+
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
+
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
+
+mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
+}
+
+bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
+      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
+}
+
+void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                                     uint32_t dex_pc,
+                                                     bool is_put,
+                                                     Primitive::Type field_type) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    AppendInstruction(
+        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
+                                                 uint32_t dex_pc,
+                                                 bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  ArtField* resolved_field = compiler_driver_->ResolveField(
+      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
+
+  if (resolved_field == nullptr) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
+    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+    return true;
+  }
+
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // The index at which the field's class is stored in the DexCache's type array.
+  uint32_t storage_index;
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
+    return false;
+  } else {
+    // TODO: This is rather expensive. Perf it and cache the results if needed.
+    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
+        outer_dex_cache.Get(),
+        GetCompilingClass(),
+        resolved_field,
+        field_index,
+        &storage_index);
+    bool can_easily_access = is_put ? pair.second : pair.first;
+    if (!can_easily_access) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+      return true;
+    }
+  }
+
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index);
+  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
+                                                 storage_index,
+                                                 outer_dex_file,
+                                                 is_outer_class,
+                                                 dex_pc,
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
+  AppendInstruction(constant);
+
+  HInstruction* cls = constant;
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
+  if (is_put) {
+    // We need to keep the class alive before loading the value.
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type));
+    AppendInstruction(new (arena_) HStaticFieldSet(cls,
+                                                   value,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HStaticFieldGet(cls,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  return true;
+}
+
+void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
+    } else {
+      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    AppendInstruction(second);
+  }
+
+  if (isDiv) {
+    AppendInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
+                                           uint32_t dex_pc,
+                                           bool is_put,
+                                           Primitive::Type anticipated_type) {
+  uint8_t source_or_dest_reg = instruction.VRegA_23x();
+  uint8_t array_reg = instruction.VRegB_23x();
+  uint8_t index_reg = instruction.VRegC_23x();
+
+  HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc);
+  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
+  AppendInstruction(length);
+  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
+  AppendInstruction(index);
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
+    // TODO: Insert a type check node if the type is Object.
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  } else {
+    HArrayGet* aget = new (arena_) HArrayGet(object, index, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArrayGet(aget);
+    AppendInstruction(aget);
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+                                              uint32_t type_index,
+                                              uint32_t number_of_vreg_arguments,
+                                              bool is_range,
+                                              uint32_t* args,
+                                              uint32_t register_index) {
+  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+      ? kQuickAllocArrayWithAccessCheck
+      : kQuickAllocArray;
+  HInstruction* object = new (arena_) HNewArray(length,
+                                                graph_->GetCurrentMethod(),
+                                                dex_pc,
+                                                type_index,
+                                                *dex_compilation_unit_->GetDexFile(),
+                                                entrypoint);
+  AppendInstruction(object);
+
+  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
+  DCHECK_EQ(descriptor[0], '[') << descriptor;
+  char primitive = descriptor[1];
+  DCHECK(primitive == 'I'
+      || primitive == 'L'
+      || primitive == '[') << descriptor;
+  bool is_reference_array = (primitive == 'L') || (primitive == '[');
+  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
+
+  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
+    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+  latest_result_ = object;
+}
+
+template <typename T>
+void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
+                                             const T* data,
+                                             uint32_t element_count,
+                                             Primitive::Type anticipated_type,
+                                             uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc);
+  HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
+  AppendInstruction(length);
+
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
+  const Instruction::ArrayDataPayload* payload =
+      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset);
+  const uint8_t* data = payload->data;
+  uint32_t element_count = payload->element_count;
+
+  // Implementation of this DEX instruction seems to be that the bounds check is
+  // done before doing any stores.
+  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
+  AppendInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
+
+  switch (payload->element_width) {
+    case 1:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int8_t*>(data),
+                         element_count,
+                         Primitive::kPrimByte,
+                         dex_pc);
+      break;
+    case 2:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int16_t*>(data),
+                         element_count,
+                         Primitive::kPrimShort,
+                         dex_pc);
+      break;
+    case 4:
+      BuildFillArrayData(array,
+                         reinterpret_cast<const int32_t*>(data),
+                         element_count,
+                         Primitive::kPrimInt,
+                         dex_pc);
+      break;
+    case 8:
+      BuildFillWideArrayData(array,
+                             reinterpret_cast<const int64_t*>(data),
+                             element_count,
+                             dex_pc);
+      break;
+    default:
+      LOG(FATAL) << "Unknown element width for " << payload->element_width;
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
+                                                 const int64_t* data,
+                                                 uint32_t element_count,
+                                                 uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, Primitive::kPrimLong, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls.Get() == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (cls->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (cls->IsArrayClass()) {
+    if (cls->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (cls->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (cls->IsFinal()) {
+    return TypeCheckKind::kExactCheck;
+  } else if (cls->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
+void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
+                                         uint8_t destination,
+                                         uint8_t reference,
+                                         uint16_t type_index,
+                                         uint32_t dex_pc) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+
+  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(),
+      dex_cache,
+      type_index);
+
+  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
+  HLoadClass* cls = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index));
+  AppendInstruction(cls);
+
+  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
+  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
+    AppendInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
+    UpdateLocal(destination, current_block_->GetLastInstruction());
+  } else {
+    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
+    // which may throw. If it succeeds BoundType sets the new type of `object`
+    // for all subsequent uses.
+    AppendInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (arena_) HBoundType(object, dex_pc));
+    UpdateLocal(reference, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index,
+                                           Handle<mirror::DexCache> dex_cache,
+                                           bool* finalizable) const {
+  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable);
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  return NeedsAccessCheck(type_index, dex_cache, finalizable);
+}
+
+bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
+  return interpreter_metadata_ != nullptr;
+}
+
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
+  DCHECK(interpreter_metadata_ != nullptr);
+
+  // First check if the info has already been decoded from `interpreter_metadata_`.
+  auto it = skipped_interpreter_metadata_.find(dex_pc);
+  if (it != skipped_interpreter_metadata_.end()) {
+    // Remove the entry from the map and return the parsed info.
+    uint16_t value_in_map = it->second;
+    skipped_interpreter_metadata_.erase(it);
+    return value_in_map;
+  }
+
+  // Otherwise start parsing `interpreter_metadata_` until the slot for `dex_pc`
+  // is found. Store skipped values in the `skipped_interpreter_metadata_` map.
+  while (true) {
+    uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    uint16_t value_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    DCHECK_LE(dex_pc_in_map, dex_pc);
+
+    if (dex_pc_in_map == dex_pc) {
+      return value_in_map;
+    } else {
+      skipped_interpreter_metadata_.Put(dex_pc_in_map, value_in_map);
+    }
+  }
+}
+
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+  switch (instruction.Opcode()) {
+    case Instruction::CONST_4: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_16: {
+      int32_t register_index = instruction.VRegA();
+      // Get 16 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_21s();
+      value <<= 48;
+      value >>= 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_32: {
+      int32_t register_index = instruction.VRegA();
+      // Get 32 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_31i();
+      value <<= 32;
+      value >>= 32;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_16:
+    case Instruction::MOVE_OBJECT_FROM16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::RETURN_VOID_NO_BARRIER:
+    case Instruction::RETURN_VOID: {
+      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
+      break;
+    }
+
+#define IF_XX(comparison, cond) \
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+
+    IF_XX(HEqual, EQ);
+    IF_XX(HNotEqual, NE);
+    IF_XX(HLessThan, LT);
+    IF_XX(HLessThanOrEqual, LE);
+    IF_XX(HGreaterThan, GT);
+    IF_XX(HGreaterThanOrEqual, GE);
+
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32: {
+      AppendInstruction(new (arena_) HGoto(dex_pc));
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::RETURN: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_OBJECT: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_WIDE: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_35c();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, false, args, -1)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_3rc();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t register_index = instruction.VRegC();
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, true, nullptr, register_index)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::NEG_INT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_LONG: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_FLOAT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_DOUBLE: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_INT: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_LONG: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_BYTE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_SHORT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_CHAR: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT16: {
+      Binop_22s<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT16: {
+      Binop_22s<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT16: {
+      Binop_22s<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT16: {
+      Binop_22s<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT: {
+      Binop_22s<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT16: {
+      Binop_22s<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT8: {
+      Binop_22b<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT8: {
+      Binop_22b<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT8: {
+      Binop_22b<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT8: {
+      Binop_22b<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT_LIT8: {
+      Binop_22b<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT8: {
+      Binop_22b<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::DIV_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
+      break;
+    }
+
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE: {
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
+      }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::NEW_ARRAY: {
+      uint16_t type_index = instruction.VRegC_22c();
+      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+          ? kQuickAllocArrayWithAccessCheck
+          : kQuickAllocArray;
+      AppendInstruction(new (arena_) HNewArray(length,
+                                               graph_->GetCurrentMethod(),
+                                               dex_pc,
+                                               type_index,
+                                               *dex_compilation_unit_->GetDexFile(),
+                                               entrypoint));
+      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t type_index = instruction.VRegB_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY_RANGE: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t type_index = instruction.VRegB_3rc();
+      uint32_t register_index = instruction.VRegC_3rc();
+      BuildFilledNewArray(
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+      break;
+    }
+
+    case Instruction::FILL_ARRAY_DATA: {
+      BuildFillArrayData(instruction, dex_pc);
+      break;
+    }
+
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_WIDE:
+    case Instruction::MOVE_RESULT_OBJECT: {
+      DCHECK(latest_result_ != nullptr);
+      UpdateLocal(instruction.VRegA(), latest_result_);
+      latest_result_ = nullptr;
+      break;
+    }
+
+    case Instruction::CMP_LONG: {
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::NOP:
+      break;
+
+    case Instruction::IGET:
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT:
+    case Instruction::IGET_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::IPUT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+#define ARRAY_XX(kind, anticipated_type)                                          \
+    case Instruction::AGET##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
+      break;                                                                      \
+    }                                                                             \
+    case Instruction::APUT##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
+      break;                                                                      \
+    }
+
+    ARRAY_XX(, Primitive::kPrimInt);
+    ARRAY_XX(_WIDE, Primitive::kPrimLong);
+    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
+    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
+    ARRAY_XX(_BYTE, Primitive::kPrimByte);
+    ARRAY_XX(_CHAR, Primitive::kPrimChar);
+    ARRAY_XX(_SHORT, Primitive::kPrimShort);
+
+    case Instruction::ARRAY_LENGTH: {
+      HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc);
+      AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
+      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_CLASS: {
+      uint16_t type_index = instruction.VRegB_21c();
+      // `CanAccessTypeWithoutChecks` will tell whether the method being
+      // built is trying to access its own class, so that the generated
+      // code can optimize for this case. However, the optimization does not
+      // work for inlining, so we use `IsOutermostCompilingClass` instead.
+      ScopedObjectAccess soa(Thread::Current());
+      Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+          dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index);
+      bool is_in_dex_cache =
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index);
+      AppendInstruction(new (arena_) HLoadClass(
+          graph_->GetCurrentMethod(),
+          type_index,
+          *dex_file_,
+          IsOutermostCompilingClass(type_index),
+          dex_pc,
+          !can_access,
+          is_in_dex_cache));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::MOVE_EXCEPTION: {
+      AppendInstruction(new (arena_) HLoadException(dex_pc));
+      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction());
+      AppendInstruction(new (arena_) HClearException(dex_pc));
+      break;
+    }
+
+    case Instruction::THROW: {
+      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
+      AppendInstruction(new (arena_) HThrow(exception, dex_pc));
+      // We finished building this block. Set the current block to null to avoid
+      // adding dead instructions to it.
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::INSTANCE_OF: {
+      uint8_t destination = instruction.VRegA_22c();
+      uint8_t reference = instruction.VRegB_22c();
+      uint16_t type_index = instruction.VRegC_22c();
+      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::CHECK_CAST: {
+      uint8_t reference = instruction.VRegA_21c();
+      uint16_t type_index = instruction.VRegB_21c();
+      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::MONITOR_ENTER: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kEnter,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::MONITOR_EXIT: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kExit,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::SPARSE_SWITCH:
+    case Instruction::PACKED_SWITCH: {
+      BuildSwitch(instruction, dex_pc);
+      break;
+    }
+
+    default:
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of unhandled instruction "
+                     << instruction.Name();
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      return false;
+  }
+  return true;
+}  // NOLINT(readability/fn_size)
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
new file mode 100644
index 0000000..0e3e5a7
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "block_builder.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
+#include "driver/dex_compilation_unit.h"
+#include "mirror/dex_cache.h"
+#include "nodes.h"
+#include "optimizing_compiler_stats.h"
+#include "ssa_builder.h"
+
+namespace art {
+
+class HInstructionBuilder : public ValueObject {
+ public:
+  HInstructionBuilder(HGraph* graph,
+                      HBasicBlockBuilder* block_builder,
+                      SsaBuilder* ssa_builder,
+                      const DexFile* dex_file,
+                      const DexFile::CodeItem& code_item,
+                      Primitive::Type return_type,
+                      DexCompilationUnit* dex_compilation_unit,
+                      const DexCompilationUnit* const outer_compilation_unit,
+                      CompilerDriver* driver,
+                      const uint8_t* interpreter_metadata,
+                      OptimizingCompilerStats* compiler_stats,
+                      Handle<mirror::DexCache> dex_cache)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        return_type_(return_type),
+        block_builder_(block_builder),
+        ssa_builder_(ssa_builder),
+        locals_for_(arena_->Adapter(kArenaAllocGraphBuilder)),
+        current_block_(nullptr),
+        current_locals_(nullptr),
+        latest_result_(nullptr),
+        compiler_driver_(driver),
+        dex_compilation_unit_(dex_compilation_unit),
+        outer_compilation_unit_(outer_compilation_unit),
+        interpreter_metadata_(interpreter_metadata),
+        skipped_interpreter_metadata_(std::less<uint32_t>(),
+                                      arena_->Adapter(kArenaAllocGraphBuilder)),
+        compilation_stats_(compiler_stats),
+        dex_cache_(dex_cache),
+        loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    loop_headers_.reserve(kDefaultNumberOfLoops);
+  }
+
+  bool Build();
+
+ private:
+  void MaybeRecordStat(MethodCompilationStat compilation_stat);
+
+  void InitializeBlockLocals();
+  void PropagateLocalsToCatchBlocks();
+  void SetLoopHeaderPhiInputs();
+
+  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+  void FindNativeDebugInfoLocations(ArenaBitVector* locations);
+
+  bool CanDecodeQuickenedInfo() const;
+  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+
+  HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
+
+  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
+  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+  HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
+  void UpdateLocal(uint32_t register_index, HInstruction* instruction);
+
+  void AppendInstruction(HInstruction* instruction);
+  void InsertInstructionAtTop(HInstruction* instruction);
+  void InitializeInstruction(HInstruction* instruction);
+
+  void InitializeParameters();
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index,
+                        Handle<mirror::DexCache> dex_cache,
+                        /*out*/bool* finalizable) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
+
+  template<typename T>
+  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     ComparisonBias bias,
+                     uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+
+  void Conversion_12x(const Instruction& instruction,
+                      Primitive::Type input_type,
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
+
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
+
+  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  // Builds an instance field access node and returns whether the instruction is supported.
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                        uint32_t dex_pc,
+                                        bool is_put,
+                                        Primitive::Type field_type);
+  // Builds a static field access node and returns whether the instruction is supported.
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildArrayAccess(const Instruction& instruction,
+                        uint32_t dex_pc,
+                        bool is_get,
+                        Primitive::Type anticipated_type);
+
+  // Builds an invocation node and returns whether the instruction is supported.
+  bool BuildInvoke(const Instruction& instruction,
+                   uint32_t dex_pc,
+                   uint32_t method_idx,
+                   uint32_t number_of_vreg_arguments,
+                   bool is_range,
+                   uint32_t* args,
+                   uint32_t register_index);
+
+  // Builds a new array node and the instructions that fill it.
+  void BuildFilledNewArray(uint32_t dex_pc,
+                           uint32_t type_index,
+                           uint32_t number_of_vreg_arguments,
+                           bool is_range,
+                           uint32_t* args,
+                           uint32_t register_index);
+
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. Currently only used for non-reference and non-floating point
+  // arrays.
+  template <typename T>
+  void BuildFillArrayData(HInstruction* object,
+                          const T* data,
+                          uint32_t element_count,
+                          Primitive::Type anticipated_type,
+                          uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. The data must be for long and double arrays.
+  void BuildFillWideArrayData(HInstruction* object,
+                              const int64_t* data,
+                              uint32_t element_count,
+                              uint32_t dex_pc);
+
+  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
+  void BuildTypeCheck(const Instruction& instruction,
+                      uint8_t destination,
+                      uint8_t reference,
+                      uint16_t type_index,
+                      uint32_t dex_pc);
+
+  // Builds an instruction sequence for a switch statement.
+  void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+  // Returns the outer-most compiling method's class.
+  mirror::Class* GetOutermostCompilingClass() const;
+
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
+  // Returns whether `type_index` points to the outer-most compiling method's class.
+  bool IsOutermostCompilingClass(uint16_t type_index) const;
+
+  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
+                                     uint32_t dex_pc,
+                                     HInvoke* invoke);
+
+  bool SetupInvokeArguments(HInvoke* invoke,
+                            uint32_t number_of_vreg_arguments,
+                            uint32_t* args,
+                            uint32_t register_index,
+                            bool is_range,
+                            const char* descriptor,
+                            size_t start_index,
+                            size_t* argument_index);
+
+  bool HandleInvoke(HInvoke* invoke,
+                    uint32_t number_of_vreg_arguments,
+                    uint32_t* args,
+                    uint32_t register_index,
+                    bool is_range,
+                    const char* descriptor,
+                    HClinitCheck* clinit_check);
+
+  bool HandleStringInit(HInvoke* invoke,
+                        uint32_t number_of_vreg_arguments,
+                        uint32_t* args,
+                        uint32_t register_index,
+                        bool is_range,
+                        const char* descriptor);
+  void HandleStringInitResult(HInvokeStaticOrDirect* invoke);
+
+  HClinitCheck* ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  // The dex file where the method being compiled is, and the bytecode data.
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  // The return type of the method being compiled.
+  const Primitive::Type return_type_;
+
+  HBasicBlockBuilder* block_builder_;
+  SsaBuilder* ssa_builder_;
+
+  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
+  HBasicBlock* current_block_;
+  ArenaVector<HInstruction*>* current_locals_;
+  HInstruction* latest_result_;
+
+  CompilerDriver* const compiler_driver_;
+
+  // The compilation unit of the current method being compiled. Note that
+  // it can be an inlined method.
+  DexCompilationUnit* const dex_compilation_unit_;
+
+  // The compilation unit of the outermost method being compiled. That is the
+  // method being compiled (and not inlined), and potentially inlining other
+  // methods.
+  const DexCompilationUnit* const outer_compilation_unit_;
+
+  // Original values kept after instruction quickening. This is a data buffer
+  // of Leb128-encoded (dex_pc, value) pairs sorted by dex_pc.
+  const uint8_t* interpreter_metadata_;
+
+  // InstructionBuilder does not parse instructions in dex_pc order. Quickening
+  // info for out-of-order dex_pcs is stored in a map until the positions
+  // are eventually visited.
+  ArenaSafeMap<uint32_t, uint16_t> skipped_interpreter_metadata_;
+
+  OptimizingCompilerStats* compilation_stats_;
+  Handle<mirror::DexCache> dex_cache_;
+
+  ArenaVector<HBasicBlock*> loop_headers_;
+
+  static constexpr int kDefaultNumberOfLoops = 2;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 1249b48..fd79901 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -101,6 +101,7 @@
   void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type);
   void SimplifyIsNaN(HInvoke* invoke);
   void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
@@ -409,9 +410,9 @@
     return true;
   }
 
-  for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* use = it.Current()->GetUser();
-    if (use->IsNullCheck() && use->StrictlyDominates(at)) {
+  for (const HUseListNode<HInstruction*>& use : input->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (user->IsNullCheck() && user->StrictlyDominates(at)) {
       return true;
     }
   }
@@ -786,14 +787,21 @@
 }
 
 static bool IsTypeConversionImplicit(Primitive::Type input_type, Primitive::Type result_type) {
+  // Invariant: We should never generate a conversion to a Boolean value.
+  DCHECK_NE(Primitive::kPrimBoolean, result_type);
+
   // Besides conversion to the same type, widening integral conversions are implicit,
   // excluding conversions to long and the byte->char conversion where we need to
   // clear the high 16 bits of the 32-bit sign-extended representation of byte.
   return result_type == input_type ||
-      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimByte) ||
-      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimShort) ||
-      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimChar) ||
-      (result_type == Primitive::kPrimShort && input_type == Primitive::kPrimByte);
+      (result_type == Primitive::kPrimInt && (input_type == Primitive::kPrimBoolean ||
+                                              input_type == Primitive::kPrimByte ||
+                                              input_type == Primitive::kPrimShort ||
+                                              input_type == Primitive::kPrimChar)) ||
+      (result_type == Primitive::kPrimChar && input_type == Primitive::kPrimBoolean) ||
+      (result_type == Primitive::kPrimShort && (input_type == Primitive::kPrimBoolean ||
+                                                input_type == Primitive::kPrimByte)) ||
+      (result_type == Primitive::kPrimByte && input_type == Primitive::kPrimBoolean);
 }
 
 static bool IsTypeConversionLossless(Primitive::Type input_type, Primitive::Type result_type) {
@@ -1063,12 +1071,12 @@
   }
 
   // Is the Compare only used for this purpose?
-  if (!left->GetUses().HasOnlyOneUse()) {
+  if (!left->GetUses().HasExactlyOneElement()) {
     // Someone else also wants the result of the compare.
     return;
   }
 
-  if (!left->GetEnvUses().IsEmpty()) {
+  if (!left->GetEnvUses().empty()) {
     // There is a reference to the compare result in an environment. Do we really need it?
     if (GetGraph()->IsDebuggable()) {
       return;
@@ -1666,6 +1674,27 @@
   invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
 }
 
+void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  uint32_t dex_pc = invoke->GetDexPc();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength.
+  HArrayLength* length = new (GetGraph()->GetArena()) HArrayLength(str, dex_pc);
+  length->MarkAsStringLength();
+  HInstruction* replacement;
+  if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
+    // For String.isEmpty(), create the `HEqual` representing the `length == 0`.
+    invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+    HIntConstant* zero = GetGraph()->GetIntConstant(0);
+    HEqual* equal = new (GetGraph()->GetArena()) HEqual(length, zero, dex_pc);
+    replacement = equal;
+  } else {
+    DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringLength);
+    replacement = length;
+  }
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1712,6 +1741,10 @@
     case Intrinsics::kDoubleDoubleToLongBits:
       SimplifyFP2Int(instruction);
       break;
+    case Intrinsics::kStringIsEmpty:
+    case Intrinsics::kStringLength:
+      SimplifyStringIsEmptyOrLength(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f00d960..e4a711e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,7 +140,7 @@
                                                                  shift_amount,
                                                                  use->GetDexPc());
     use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
-    if (bitfield_op->GetUses().IsEmpty()) {
+    if (bitfield_op->GetUses().empty()) {
       bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
     }
     RecordSimplification();
@@ -160,20 +160,22 @@
   const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
 
   // Check whether we can merge the instruction in all its users' shifter operand.
-  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
-    HInstruction* use = it_use.Current()->GetUser();
-    if (!HasShifterOperand(use)) {
+  for (const HUseListNode<HInstruction*>& use : uses) {
+    HInstruction* user = use.GetUser();
+    if (!HasShifterOperand(user)) {
       return false;
     }
-    if (!CanMergeIntoShifterOperand(use, bitfield_op)) {
+    if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
       return false;
     }
   }
 
   // Merge the instruction into its uses.
-  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
-    HInstruction* use = it_use.Current()->GetUser();
-    bool merged = MergeIntoShifterOperand(use, bitfield_op);
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    // Increment `it` now because `*it` will disappear thanks to MergeIntoShifterOperand().
+    ++it;
+    bool merged = MergeIntoShifterOperand(user, bitfield_op);
     DCHECK(merged);
   }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 338120b..da26998 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,6 +51,22 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
+  /**
+   * This simplifier uses a special-purpose BB visitor.
+   * (1) No need to visit Phi nodes.
+   * (2) Since statements can be removed in a "forward" fashion,
+   *     the visitor should test if each statement is still there.
+   */
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    // TODO: fragile iteration, provide more robust iterators?
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
   // HInstruction visitors, sorted alphabetically.
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index a11b5bd..dab1ebc 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -103,13 +103,10 @@
       return false;
   }
 
-  HInstruction* use = mul->HasNonEnvironmentUses()
-      ? mul->GetUses().GetFirst()->GetUser()
-      : nullptr;
-
   ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
 
   if (mul->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* use = mul->GetUses().front().GetUser();
     if (use->IsAdd() || use->IsSub()) {
       // Replace code looking like
       //    MUL tmp, x, y
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 5d4c4e2..418d59c 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -388,10 +388,8 @@
     case kIntrinsicGetCharsNoCheck:
       return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
-      // The inliner can handle these two cases - and this is the preferred approach
-      // since after inlining the call is no longer visible (as opposed to waiting
-      // until codegen to handle intrinsic).
-      return Intrinsics::kNone;
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3da8285..214250f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -30,6 +30,10 @@
 // Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
 static constexpr bool kRoundIsPlusPointFive = false;
 
+// Positive floating-point infinities.
+static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
+static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
+
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
@@ -124,11 +128,12 @@
 void Set##name() { SetBit(k##name); }                  \
 bool Get##name() const { return IsBitSet(k##name); }   \
 private:                                               \
-static constexpr int k##name = bit
+static constexpr size_t k##name = bit
 
 class IntrinsicOptimizations : public ValueObject {
  public:
-  explicit IntrinsicOptimizations(HInvoke* invoke) : value_(invoke->GetIntrinsicOptimizations()) {}
+  explicit IntrinsicOptimizations(HInvoke* invoke)
+      : value_(invoke->GetIntrinsicOptimizations()) {}
   explicit IntrinsicOptimizations(const HInvoke& invoke)
       : value_(invoke.GetIntrinsicOptimizations()) {}
 
@@ -138,15 +143,17 @@
 
  protected:
   bool IsBitSet(uint32_t bit) const {
+    DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
     return (*value_ & (1 << bit)) != 0u;
   }
 
   void SetBit(uint32_t bit) {
-    *(const_cast<uint32_t*>(value_)) |= (1 << bit);
+    DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
+    *(const_cast<uint32_t* const>(value_)) |= (1 << bit);
   }
 
  private:
-  const uint32_t *value_;
+  const uint32_t* const value_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations);
 };
@@ -158,7 +165,7 @@
 void Set##name() { SetBit(k##name); }                                 \
 bool Get##name() const { return IsBitSet(k##name); }                  \
 private:                                                              \
-static constexpr int k##name = bit + kNumberOfGenericOptimizations
+static constexpr size_t k##name = bit + kNumberOfGenericOptimizations
 
 class StringEqualsOptimizations : public IntrinsicOptimizations {
  public:
@@ -232,6 +239,8 @@
 UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
 UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
 UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
+UNREACHABLE_INTRINSIC(Arch, StringLength)           \
 UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
 UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
 UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4b94c94..4e3ace4 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1115,15 +1115,15 @@
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
         std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
@@ -1134,16 +1134,18 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = locations->InAt(1).AsRegister<Register>();
-    __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
-    __ cmp(char_reg, ShifterOperand(tmp_reg));
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ cmp(char_reg,
+           ShifterOperand(static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1));
     slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
     codegen->AddSlowPath(slow_path);
-    __ b(slow_path->GetEntryLabel(), HI);
+    __ b(slow_path->GetEntryLabel(), HS);
   }
 
   if (start_at_zero) {
+    Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
     DCHECK_EQ(tmp_reg, R2);
     // Start-index = 0.
     __ LoadImmediate(tmp_reg, 0);
@@ -1170,7 +1172,7 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 
-  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  // Need to send start-index=0.
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
 }
 
@@ -1190,9 +1192,6 @@
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   locations->SetOut(Location::RegisterLocation(R0));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1394,15 +1393,13 @@
   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
   codegen_->AddSlowPath(slow_path);
 
-  Label ok;
+  Label conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
-  if (!optimizations.GetDestinationIsSource()) {
-    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
-      __ cmp(src, ShifterOperand(dest));
-    }
+  if (!optimizations.GetDestinationIsSource() &&
+      (!src_pos.IsConstant() || !dest_pos.IsConstant())) {
+    __ cmp(src, ShifterOperand(dest));
   }
-
   // If source and destination are the same, we go to slow path if we need to do
   // forward copying.
   if (src_pos.IsConstant()) {
@@ -1413,14 +1410,14 @@
              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
     } else {
       if (!optimizations.GetDestinationIsSource()) {
-        __ b(&ok, NE);
+        __ b(&conditions_on_positions_validated, NE);
       }
       __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
       __ b(slow_path->GetEntryLabel(), GT);
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
-      __ b(&ok, NE);
+      __ b(&conditions_on_positions_validated, NE);
     }
     if (dest_pos.IsConstant()) {
       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1431,7 +1428,7 @@
     __ b(slow_path->GetEntryLabel(), LT);
   }
 
-  __ Bind(&ok);
+  __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
@@ -1482,7 +1479,7 @@
     bool did_unpoison = false;
     if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
         !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // One or two of the references need to be unpoisoned. Unpoison them
       // both to make the identity check valid.
       __ MaybeUnpoisonHeapReference(temp1);
       __ MaybeUnpoisonHeapReference(temp2);
@@ -1491,6 +1488,7 @@
 
     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
       // Bail out if the destination is not a non primitive array.
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
       __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
       __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
       __ MaybeUnpoisonHeapReference(temp3);
@@ -1501,7 +1499,7 @@
 
     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
       // Bail out if the source is not a non primitive array.
-      // Bail out if the destination is not a non primitive array.
+      // /* HeapReference<Class> */ temp3 = temp2->component_type_
       __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
       __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
       __ MaybeUnpoisonHeapReference(temp3);
@@ -1518,8 +1516,10 @@
       if (!did_unpoison) {
         __ MaybeUnpoisonHeapReference(temp1);
       }
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
       __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
       __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp1 = temp1->super_class_
       __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
       // No need to unpoison the result, we're comparing against null.
       __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
@@ -1530,8 +1530,10 @@
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
+    // /* HeapReference<Class> */ temp1 = src->klass_
     __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
     __ MaybeUnpoisonHeapReference(temp1);
+    // /* HeapReference<Class> */ temp3 = temp1->component_type_
     __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
     __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
     __ MaybeUnpoisonHeapReference(temp3);
@@ -1585,7 +1587,7 @@
                        temp2,
                        dest,
                        Register(kNoRegister),
-                       /* can_be_null */ false);
+                       /* value_can_be_null */ false);
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1982,6 +1984,56 @@
   __ Bind(&done);
 }
 
+void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ eor(out, out, ShifterOperand(infinity));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double precision positive infinity.");
+  __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ eor(out, out, ShifterOperand(infinity_high));
+  __ eor(out, out, ShifterOperand(infinity_high2));
+  // We don't care about the sign bit, so shift left.
+  __ orr(out, IP, ShifterOperand(out, LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
@@ -1998,8 +2050,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5de2306..cc5fd65 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -47,6 +47,7 @@
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 using helpers::InputRegisterAt;
+using helpers::OutputRegister;
 
 namespace {
 
@@ -368,17 +369,16 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenBitCount(HInvoke* instr, bool is_long, vixl::MacroAssembler* masm) {
-  DCHECK(instr->GetType() == Primitive::kPrimInt);
-  DCHECK((is_long && instr->InputAt(0)->GetType() == Primitive::kPrimLong) ||
-         (!is_long && instr->InputAt(0)->GetType() == Primitive::kPrimInt));
+static void GenBitCount(HInvoke* instr, Primitive::Type type, vixl::MacroAssembler* masm) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
 
-  Location out = instr->GetLocations()->Out();
   UseScratchRegisterScope temps(masm);
 
   Register src = InputRegisterAt(instr, 0);
-  Register dst = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-  FPRegister fpr = is_long ? temps.AcquireD() : temps.AcquireS();
+  Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
+  FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
 
   __ Fmov(fpr, src);
   __ Cnt(fpr.V8B(), fpr.V8B());
@@ -391,7 +391,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(invoke, /* is_long */ true, GetVIXLAssembler());
+  GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
@@ -399,7 +399,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(invoke, /* is_long */ false, GetVIXLAssembler());
+  GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1174,31 +1174,118 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
   vixl::MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = XRegisterFrom(locations->InAt(0));
+  Register arg = XRegisterFrom(locations->InAt(1));
+  Register out = OutputRegister(invoke);
+
+  Register temp0 = WRegisterFrom(locations->GetTemp(0));
+  Register temp1 = WRegisterFrom(locations->GetTemp(1));
+  Register temp2 = WRegisterFrom(locations->GetTemp(2));
+
+  vixl::Label loop;
+  vixl::Label find_char_diff;
+  vixl::Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = WRegisterFrom(locations->InAt(1));
-  __ Cmp(argument, 0);
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARM64* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(arg, slow_path->GetEntryLabel());
+  }
 
-  __ Ldr(
-      lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pStringCompareTo).Int32Value()));
-  __ Blr(lr);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(&end, eq);
+  // Load lengths of this and argument strings.
+  __ Ldr(temp0, MemOperand(str.X(), count_offset));
+  __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+  // Return zero if both strings are empty.
+  __ Orr(out, temp0, temp1);
+  __ Cbz(out, &end);
+  // out = length diff.
+  __ Subs(out, temp0, temp1);
+  // temp2 = min(len(str), len(arg)).
+  __ Csel(temp2, temp1, temp0, ge);
+  // Shorter string is empty?
+  __ Cbz(temp2, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  UseScratchRegisterScope scratch_scope(masm);
+  Register temp4 = scratch_scope.AcquireX();
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Promote temp0 to an X reg, ready for LDR.
+  temp0 = temp0.X();
+
+  // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
+  __ Bind(&loop);
+  __ Ldr(temp4, MemOperand(str.X(), temp1));
+  __ Ldr(temp0, MemOperand(arg.X(), temp1));
+  __ Cmp(temp4, temp0);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 4);
+  __ Subs(temp2, temp2, 4);
+  __ B(gt, &loop);
+  __ B(&end);
+
+  // Promote temp1 to an X reg, ready for EOR.
+  temp1 = temp1.X();
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp0, temp4);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+  __ Bic(temp1, temp1, 0xf);
+  // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+  // the difference occurs outside the remaining string data, so just return length diff (out).
+  __ Cmp(temp2, Operand(temp1, LSR, 4));
+  __ B(le, &end);
+  // Extract the characters and calculate the difference.
+  __ Lsr(temp0, temp0, temp1);
+  __ Lsr(temp4, temp4, temp1);
+  __ And(temp4, temp4, 0xffff);
+  __ Sub(out, temp4, Operand(temp0, UXTH));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
@@ -1303,15 +1390,15 @@
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
-  Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
 
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeARM64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -1321,17 +1408,17 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = WRegisterFrom(locations->InAt(1));
-    __ Mov(tmp_reg, 0xFFFF);
-    __ Cmp(char_reg, Operand(tmp_reg));
+    __ Tst(char_reg, 0xFFFF0000);
     slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
     codegen->AddSlowPath(slow_path);
-    __ B(hi, slow_path->GetEntryLabel());
+    __ B(ne, slow_path->GetEntryLabel());
   }
 
   if (start_at_zero) {
     // Start-index = 0.
+    Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
     __ Mov(tmp_reg, 0);
   }
 
@@ -1355,7 +1442,7 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
 
-  // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+  // Need to send start_index=0.
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
 }
 
@@ -1375,9 +1462,6 @@
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
-
-  // Need a temp for slow-path codepoint compare.
-  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1709,7 +1793,7 @@
 
 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
 // implementation there for longer copy lengths.
-static constexpr int32_t kSystemArrayCopyThreshold = 32;
+static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
 
 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
                                                uint32_t at,
@@ -1740,7 +1824,7 @@
   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   if (length != nullptr) {
     int32_t len = length->GetValue();
-    if (len < 0 || len > kSystemArrayCopyThreshold) {
+    if (len < 0 || len > kSystemArrayCopyCharThreshold) {
       // Just call as normal.
       return;
     }
@@ -1820,39 +1904,32 @@
                                         const Register& dst_base,
                                         const Register& src_end) {
   DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
-         << "Unexpected element type: "
-         << type;
-  const int32_t char_size = Primitive::ComponentSize(type);
-  const int32_t char_size_shift = Primitive::ComponentSizeShift(type);
+      << "Unexpected element type: " << type;
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
 
-  uint32_t offset = mirror::Array::DataOffset(char_size).Uint32Value();
+  uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ Add(src_base, src, char_size * constant + offset);
+    __ Add(src_base, src, element_size * constant + data_offset);
   } else {
-    __ Add(src_base, src, offset);
-    __ Add(src_base,
-           src_base,
-           Operand(XRegisterFrom(src_pos), LSL, char_size_shift));
+    __ Add(src_base, src, data_offset);
+    __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
   }
 
   if (dst_pos.IsConstant()) {
     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ Add(dst_base, dst, char_size * constant + offset);
+    __ Add(dst_base, dst, element_size * constant + data_offset);
   } else {
-    __ Add(dst_base, dst, offset);
-    __ Add(dst_base,
-           dst_base,
-           Operand(XRegisterFrom(dst_pos), LSL, char_size_shift));
+    __ Add(dst_base, dst, data_offset);
+    __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
   }
 
   if (copy_length.IsConstant()) {
     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
-    __ Add(src_end, src_base, char_size * constant);
+    __ Add(src_end, src_base, element_size * constant);
   } else {
-    __ Add(src_end,
-           src_base,
-           Operand(XRegisterFrom(copy_length), LSL, char_size_shift));
+    __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
   }
 }
 
@@ -1883,7 +1960,7 @@
     // If the length is negative, bail out.
     __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
     // If the length > 32 then (currently) prefer libcore's native implementation.
-    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
     __ B(slow_path->GetEntryLabel(), gt);
   } else {
     // We have already checked in the LocationsBuilder for the constant case.
@@ -1944,10 +2021,311 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopy)
+// We can choose to use the native implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyThreshold = 128;
+
+// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
+// We want to use two temporary registers in order to reduce the register pressure in arm64.
+// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
+void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0 || len >= kSystemArrayCopyThreshold) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
+  locations->SetInAt(2, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
+  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  Register src = XRegisterFrom(locations->InAt(0));
+  Location src_pos = locations->InAt(1);
+  Register dest = XRegisterFrom(locations->InAt(2));
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = WRegisterFrom(locations->GetTemp(0));
+  Register temp2 = WRegisterFrom(locations->GetTemp(1));
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  vixl::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource() &&
+     (!src_pos.IsConstant() || !dest_pos.IsConstant())) {
+      __ Cmp(src, dest);
+  }
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ B(&conditions_on_positions_validated, ne);
+      }
+      __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
+      __ B(slow_path->GetEntryLabel(), gt);
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ B(&conditions_on_positions_validated, ne);
+    }
+    __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
+           OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
+    __ B(slow_path->GetEntryLabel(), lt);
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ Cbz(src, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ Cbz(dest, slow_path->GetEntryLabel());
+  }
+
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    // If the length is negative, bail out.
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+    // If the length >= 128 then (currently) prefer native implementation.
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ B(slow_path->GetEntryLabel(), ge);
+  }
+  // Validity checks: source.
+  CheckSystemArrayCopyPosition(masm,
+                               src_pos,
+                               src,
+                               length,
+                               slow_path,
+                               temp1,
+                               temp2,
+                               optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckSystemArrayCopyPosition(masm,
+                               dest_pos,
+                               dest,
+                               length,
+                               slow_path,
+                               temp1,
+                               temp2,
+                               optimizations.GetCountIsDestinationLength());
+  {
+    // We use a block to end the scratch scope before the write barrier, thus
+    // freeing the temporary registers so they can be used in `MarkGCCard`.
+    UseScratchRegisterScope temps(masm);
+    Register temp3 = temps.AcquireW();
+    if (!optimizations.GetDoesNotNeedTypeCheck()) {
+      // Check whether all elements of the source array are assignable to the component
+      // type of the destination array. We do two checks: the classes are the same,
+      // or the destination is Object[]. If none of these checks succeed, we go to the
+      // slow path.
+      __ Ldr(temp1, MemOperand(dest, class_offset));
+      __ Ldr(temp2, MemOperand(src, class_offset));
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ Ldr(temp3, HeapOperand(temp1, component_offset));
+        __ Cbz(temp3, slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ Ldr(temp3, HeapOperand(temp2, component_offset));
+        __ Cbz(temp3, slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, slow_path->GetEntryLabel());
+      }
+
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl::Label do_copy;
+        __ B(&do_copy, eq);
+        if (!did_unpoison) {
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ Ldr(temp1, HeapOperand(temp1, component_offset));
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ Ldr(temp1, HeapOperand(temp1, super_offset));
+        // No need to unpoison the result, we're comparing against null.
+        __ Cbnz(temp1, slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(slow_path->GetEntryLabel(), ne);
+      }
+    } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+      // Bail out if the source is not a non primitive array.
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ Ldr(temp3, HeapOperand(temp1, component_offset));
+      __ Cbz(temp3, slow_path->GetEntryLabel());
+      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+      __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Cbnz(temp3, slow_path->GetEntryLabel());
+    }
+
+    Register src_curr_addr = temp1.X();
+    Register dst_curr_addr = temp2.X();
+    Register src_stop_addr = temp3.X();
+
+    GenSystemArrayCopyAddresses(masm,
+                                Primitive::kPrimNot,
+                                src,
+                                src_pos,
+                                dest,
+                                dest_pos,
+                                length,
+                                src_curr_addr,
+                                dst_curr_addr,
+                                src_stop_addr);
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison, nor do any read barrier as the next uses of the destination
+    // array will do it.
+    vixl::Label loop, done;
+    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    __ Bind(&loop);
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(&done, eq);
+    {
+      Register tmp = temps.AcquireW();
+      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, vixl::PostIndex));
+      __ Str(tmp, MemOperand(dst_curr_addr, element_size, vixl::PostIndex));
+    }
+    __ B(&loop);
+    __ Bind(&done);
+  }
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenIsInfinite(LocationSummary* locations,
+                          bool is64bit,
+                          vixl::MacroAssembler* masm) {
+  Operand infinity;
+  Register out;
+
+  if (is64bit) {
+    infinity = kPositiveInfinityDouble;
+    out = XRegisterFrom(locations->Out());
+  } else {
+    infinity = kPositiveInfinityFloat;
+    out = WRegisterFrom(locations->Out());
+  }
+
+  const Register zero = vixl::Assembler::AppropriateZeroRegFor(out);
+
+  MoveFPToInt(locations, is64bit, masm);
+  __ Eor(out, out, infinity);
+  // We don't care about the sign bit, so shift left.
+  __ Cmp(zero, Operand(out, LSL, 1));
+  __ Cset(out, eq);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index dd9294d..db60238 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -107,6 +107,8 @@
   V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
+  V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 1280587..20b61f8 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -614,8 +614,6 @@
                         Primitive::Type type,
                         bool isR6,
                         MipsAssembler* assembler) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
   Register out = locations->Out().AsRegister<Register>();
 
   // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -663,7 +661,8 @@
       __ MulR2(out, out, TMP);
     }
     __ Srl(out, out, 24);
-  } else if (type == Primitive::kPrimLong) {
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
     Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
     Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
@@ -2068,10 +2067,11 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check for code points > 0xFFFF. Either a slow-path check when we
-  // don't know statically, or directly dispatch if we have a constant.
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeMIPS* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
@@ -2082,7 +2082,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     Register char_reg = locations->InAt(1).AsRegister<Register>();
     // The "bltu" conditional branch tests to see if the character value
     // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then
@@ -2284,10 +2284,10 @@
     // If one, or more, of the exponent bits is zero, then the number can't be infinite.
     if (type == Primitive::kPrimDouble) {
       __ MoveFromFpuHigh(TMP, in);
-      __ LoadConst32(AT, 0x7FF00000);
+      __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble));
     } else {
       __ Mfc1(TMP, in);
-      __ LoadConst32(AT, 0x7F800000);
+      __ LoadConst32(AT, kPositiveInfinityFloat);
     }
     __ Xor(TMP, TMP, AT);
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cf973aa..7188e1c 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -385,6 +385,92 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenBitCount(LocationSummary* locations,
+                        const Primitive::Type type,
+                        Mips64Assembler* assembler) {
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm requires 25 instructions.
+  //
+  // For a 64-bit operand this can be performed in 24 instructions compared
+  // to a(n unrolled) loop based algorithm which requires 38 instructions.
+  //
+  // There are algorithms which are faster in the cases where very few
+  // bits are set but the algorithm here attempts to minimize the total
+  // number of instructions executed even when a large number of bits
+  // are set.
+
+  if (type == Primitive::kPrimInt) {
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    __ MulR6(out, out, TMP);
+    __ Srl(out, out, 24);
+  } else if (type == Primitive::kPrimLong) {
+    __ Dsrl(TMP, in, 1);
+    __ LoadConst64(AT, 0x5555555555555555L);
+    __ And(TMP, TMP, AT);
+    __ Dsubu(TMP, in, TMP);
+    __ LoadConst64(AT, 0x3333333333333333L);
+    __ And(out, TMP, AT);
+    __ Dsrl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Daddu(TMP, out, TMP);
+    __ Dsrl(out, TMP, 4);
+    __ Daddu(out, out, TMP);
+    __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+    __ And(out, out, AT);
+    __ LoadConst64(TMP, 0x0101010101010101L);
+    __ Dmul(out, out, TMP);
+    __ Dsrl32(out, out, 24);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// int java.lang.Long.bitCount(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1477,10 +1563,11 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check for code points > 0xFFFF. Either a slow-path check when we
-  // don't know statically, or directly dispatch if we have a constant.
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCodeMIPS64* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
@@ -1491,7 +1578,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>();
     __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
     slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
@@ -1693,9 +1780,6 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongBitCount)
-
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 95fdb9b..d0edeca 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1418,9 +1418,10 @@
   DCHECK_EQ(out, EDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
@@ -1431,7 +1432,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
     codegen->AddSlowPath(slow_path);
@@ -2387,10 +2388,10 @@
   if (invoke->InputAt(0)->IsConstant()) {
     // Evaluate this at compile time.
     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    value = is_long
+    int32_t result = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    codegen->Load32BitValue(out, value);
+    codegen->Load32BitValue(out, result);
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9e568f7..4ee2368 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1150,15 +1150,13 @@
   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
   codegen_->AddSlowPath(slow_path);
 
-  NearLabel ok;
+  NearLabel conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
-  if (!optimizations.GetDestinationIsSource()) {
-    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
-      __ cmpl(src, dest);
-    }
+  if (!optimizations.GetDestinationIsSource() &&
+      (!src_pos.IsConstant() || !dest_pos.IsConstant())) {
+    __ cmpl(src, dest);
   }
-
   // If source and destination are the same, we go to slow path if we need to do
   // forward copying.
   if (src_pos.IsConstant()) {
@@ -1169,14 +1167,14 @@
              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
     } else {
       if (!optimizations.GetDestinationIsSource()) {
-        __ j(kNotEqual, &ok);
+        __ j(kNotEqual, &conditions_on_positions_validated);
       }
       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
       __ j(kGreater, slow_path->GetEntryLabel());
     }
   } else {
     if (!optimizations.GetDestinationIsSource()) {
-      __ j(kNotEqual, &ok);
+      __ j(kNotEqual, &conditions_on_positions_validated);
     }
     if (dest_pos.IsConstant()) {
       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1188,7 +1186,7 @@
     }
   }
 
-  __ Bind(&ok);
+  __ Bind(&conditions_on_positions_validated);
 
   if (!optimizations.GetSourceIsNotNull()) {
     // Bail out if the source is null.
@@ -1241,7 +1239,7 @@
     bool did_unpoison = false;
     if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
         !optimizations.GetSourceIsNonPrimitiveArray()) {
-      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // One or two of the references need to be unpoisoned. Unpoison them
       // both to make the identity check valid.
       __ MaybeUnpoisonHeapReference(temp1);
       __ MaybeUnpoisonHeapReference(temp2);
@@ -1250,6 +1248,7 @@
 
     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
       // Bail out if the destination is not a non primitive array.
+      // /* HeapReference<Class> */ TMP = temp1->component_type_
       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
       __ testl(CpuRegister(TMP), CpuRegister(TMP));
       __ j(kEqual, slow_path->GetEntryLabel());
@@ -1260,6 +1259,7 @@
 
     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
       // Bail out if the source is not a non primitive array.
+      // /* HeapReference<Class> */ TMP = temp2->component_type_
       __ movl(CpuRegister(TMP), Address(temp2, component_offset));
       __ testl(CpuRegister(TMP), CpuRegister(TMP));
       __ j(kEqual, slow_path->GetEntryLabel());
@@ -1276,8 +1276,10 @@
       if (!did_unpoison) {
         __ MaybeUnpoisonHeapReference(temp1);
       }
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
       __ movl(temp1, Address(temp1, component_offset));
       __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp1 = temp1->super_class_
       __ movl(temp1, Address(temp1, super_offset));
       // No need to unpoison the result, we're comparing against null.
       __ testl(temp1, temp1);
@@ -1289,8 +1291,10 @@
   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
     // Bail out if the source is not a non primitive array.
+    // /* HeapReference<Class> */ temp1 = src->klass_
     __ movl(temp1, Address(src, class_offset));
     __ MaybeUnpoisonHeapReference(temp1);
+    // /* HeapReference<Class> */ TMP = temp1->component_type_
     __ movl(CpuRegister(TMP), Address(temp1, component_offset));
     __ testl(CpuRegister(TMP), CpuRegister(TMP));
     __ j(kEqual, slow_path->GetEntryLabel());
@@ -1513,9 +1517,10 @@
   DCHECK_EQ(out.AsRegister(), RDI);
 
   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
-  // or directly dispatch if we have a constant.
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
   SlowPathCode* slow_path = nullptr;
-  if (invoke->InputAt(1)->IsIntConstant()) {
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
@@ -1526,7 +1531,7 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
-  } else {
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
     codegen->AddSlowPath(slow_path);
@@ -2402,10 +2407,10 @@
   if (invoke->InputAt(0)->IsConstant()) {
     // Evaluate this at compile time.
     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    value = is_long
+    int32_t result = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    codegen->Load32BitValue(out, value);
+    codegen->Load32BitValue(out, result);
     return;
   }
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 7a1e06b..7543cd6 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -79,8 +79,15 @@
 
 void LICM::Run() {
   DCHECK(side_effects_.HasRun());
+
   // Only used during debug.
-  ArenaBitVector visited(graph_->GetArena(), graph_->GetBlocks().size(), false, kArenaAllocLICM);
+  ArenaBitVector* visited = nullptr;
+  if (kIsDebugBuild) {
+    visited = new (graph_->GetArena()) ArenaBitVector(graph_->GetArena(),
+                                                      graph_->GetBlocks().size(),
+                                                      false,
+                                                      kArenaAllocLICM);
+  }
 
   // Post order visit to visit inner loops before outer loops.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -94,31 +101,24 @@
     SideEffects loop_effects = side_effects_.GetLoopEffects(block);
     HBasicBlock* pre_header = loop_info->GetPreHeader();
 
-    bool contains_irreducible_loop = false;
-    if (graph_->HasIrreducibleLoops()) {
-      for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
-        if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) {
-          contains_irreducible_loop = true;
-          break;
-        }
-      }
-    }
-
     for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
       HBasicBlock* inner = it_loop.Current();
       DCHECK(inner->IsInLoop());
       if (inner->GetLoopInformation() != loop_info) {
         // Thanks to post order visit, inner loops were already visited.
-        DCHECK(visited.IsBitSet(inner->GetBlockId()));
+        DCHECK(visited->IsBitSet(inner->GetBlockId()));
         continue;
       }
-      visited.SetBit(inner->GetBlockId());
+      if (kIsDebugBuild) {
+        visited->SetBit(inner->GetBlockId());
+      }
 
-      if (contains_irreducible_loop) {
+      if (loop_info->ContainsIrreducibleLoop()) {
         // We cannot licm in an irreducible loop, or in a natural loop containing an
         // irreducible loop.
         continue;
       }
+      DCHECK(!loop_info->IsIrreducible());
 
       // We can move an instruction that can throw only if it is the first
       // throwing instruction in the loop. Note that the first potentially
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index d446539..2a62643 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -169,13 +169,11 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with different types.
-  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
-  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, int_constant_, Primitive::kPrimByte, 0);
+      parameter_, int_constant_, Primitive::kPrimInt, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, int_constant_, float_constant_, Primitive::kPrimShort, 0);
+      parameter_, int_constant_, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -189,13 +187,11 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get array with same types.
-  // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to
-  // avoid SsaBuilder's typing of ambiguous array operations from reference type info.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, int_constant_, Primitive::kPrimByte, 0);
+      parameter_, int_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, get_array, float_constant_, Primitive::kPrimByte, 0);
+      parameter_, get_array, float_constant_, Primitive::kPrimFloat, 0);
   loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 3202493..f9a955f 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -211,8 +211,8 @@
    *
    * Which becomes the following graph (numbered by lifetime position):
    *       2: constant0
-   *       4: constant4
-   *       6: constant5
+   *       4: constant5
+   *       6: constant4
    *       8: goto
    *           |
    *       12: goto
@@ -247,7 +247,7 @@
   liveness.Analyze();
 
   // Test for the 0 constant.
-  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  LiveInterval* interval = graph->GetIntConstant(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the loop phi so instruction is live until
@@ -256,18 +256,18 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 4 constant.
-  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  interval = graph->GetIntConstant(4)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the end of the loop.
-  ASSERT_EQ(4u, range->GetStart());
+  ASSERT_EQ(6u, range->GetStart());
   ASSERT_EQ(24u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 5 constant.
-  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  interval = graph->GetIntConstant(5)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
-  ASSERT_EQ(6u, range->GetStart());
+  ASSERT_EQ(4u, range->GetStart());
   ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
@@ -441,7 +441,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi();
-  ASSERT_TRUE(phi->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(phi->GetUses().HasExactlyOneElement());
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(26u, range->GetStart());
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 92a987c..bd74368 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -154,7 +154,7 @@
   // return a;
   //
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (0000)\n"
@@ -165,11 +165,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"  // else block
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"  // then block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
@@ -291,7 +291,7 @@
   // }
   // return 5;
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -310,7 +310,7 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 5\n"  // exit block
@@ -386,7 +386,7 @@
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 8)
+  // (constant0, constant5, constant4, phi in block 8)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -397,11 +397,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // loop header
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index e1977b1..8a75a90 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -43,31 +43,29 @@
 
     // Visit all uses to determine if this reference can spread into the heap,
     // a method call, etc.
-    for (HUseIterator<HInstruction*> use_it(reference_->GetUses());
-         !use_it.Done();
-         use_it.Advance()) {
-      HInstruction* use = use_it.Current()->GetUser();
-      DCHECK(!use->IsNullCheck()) << "NullCheck should have been eliminated";
-      if (use->IsBoundType()) {
+    for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) {
+      HInstruction* user = use.GetUser();
+      DCHECK(!user->IsNullCheck()) << "NullCheck should have been eliminated";
+      if (user->IsBoundType()) {
         // BoundType shouldn't normally be necessary for a NewInstance.
         // Just be conservative for the uncommon cases.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsPhi() || use->IsSelect() || use->IsInvoke() ||
-          (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
-          (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
+      if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
+          (user->IsInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsStaticFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedStaticFieldSet() && (reference_ == user->InputAt(0))) ||
+          (user->IsArraySet() && (reference_ == user->InputAt(2)))) {
         // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsReturn()) {
+      if (user->IsReturn()) {
         is_singleton_and_not_returned_ = false;
       }
     }
@@ -480,7 +478,7 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;
+  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
@@ -551,19 +549,20 @@
     }
 
     // At this point, stores in possibly_removed_stores_ can be safely removed.
-    size = possibly_removed_stores_.size();
-    for (size_t i = 0; i < size; i++) {
+    for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
       HInstruction* store = possibly_removed_stores_[i];
       DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
       store->GetBlock()->RemoveInstruction(store);
     }
 
-    // TODO: remove unnecessary allocations.
-    // Eliminate instructions in singleton_new_instances_ that:
-    // - don't have uses,
-    // - don't have finalizers,
-    // - are instantiable and accessible,
-    // - have no/separate clinit check.
+    // Eliminate allocations that are not used.
+    for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
+      HInstruction* new_instance = singleton_new_instances_[i];
+      if (!new_instance->HasNonEnvironmentUses()) {
+        new_instance->RemoveEnvironmentUsers();
+        new_instance->GetBlock()->RemoveInstruction(new_instance);
+      }
+    }
   }
 
  private:
@@ -734,19 +733,14 @@
       if (Primitive::PrimitiveKind(heap_value->GetType())
               != Primitive::PrimitiveKind(instruction->GetType())) {
         // The only situation where the same heap location has different type is when
-        // we do an array get from a null constant. In order to stay properly typed
-        // we do not merge the array gets.
+        // we do an array get on an instruction that originates from the null constant
+        // (the null could be behind a field access, an array access, a null check or
+        // a bound type).
+        // In order to stay properly typed on primitive types, we do not eliminate
+        // the array gets.
         if (kIsDebugBuild) {
           DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName();
           DCHECK(instruction->IsArrayGet()) << instruction->DebugName();
-          HInstruction* array = instruction->AsArrayGet()->GetArray();
-          DCHECK(array->IsNullCheck()) << array->DebugName();
-          HInstruction* input = HuntForOriginalReference(array->InputAt(0));
-          DCHECK(input->IsNullConstant()) << input->DebugName();
-          array = heap_value->AsArrayGet()->GetArray();
-          DCHECK(array->IsNullCheck()) << array->DebugName();
-          input = HuntForOriginalReference(array->InputAt(0));
-          DCHECK(input->IsNullConstant()) << input->DebugName();
         }
         return;
       }
@@ -969,8 +963,8 @@
     if (!heap_location_collector_.MayDeoptimize() &&
         ref_info->IsSingletonAndNotReturned() &&
         !new_instance->IsFinalizable() &&
-        !new_instance->CanThrow()) {
-      // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination.
+        !new_instance->NeedsAccessCheck()) {
+      singleton_new_instances_.push_back(new_instance);
     }
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[new_instance->GetBlock()->GetBlockId()];
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 05bb901..60329cc 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -56,9 +56,11 @@
   // Nodes that we're currently visiting, indexed by block id.
   ArenaBitVector visiting(arena_, blocks_.size(), false, kArenaAllocGraphBuilder);
   // Number of successors visited from a given node, indexed by block id.
-  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> successors_visited(blocks_.size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
   // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
-  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
   constexpr size_t kDefaultWorklistSize = 8;
   worklist.reserve(kDefaultWorklistSize);
   visited->SetBit(entry_block_->GetBlockId());
@@ -86,11 +88,7 @@
   }
 }
 
-static void RemoveAsUser(HInstruction* instruction) {
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->RemoveAsUserOfInput(i);
-  }
-
+static void RemoveEnvironmentUses(HInstruction* instruction) {
   for (HEnvironment* environment = instruction->GetEnvironment();
        environment != nullptr;
        environment = environment->GetParent()) {
@@ -102,6 +100,14 @@
   }
 }
 
+static void RemoveAsUser(HInstruction* instruction) {
+  for (size_t i = 0; i < instruction->InputCount(); i++) {
+    instruction->RemoveAsUserOfInput(i);
+  }
+
+  RemoveEnvironmentUses(instruction);
+}
+
 void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
@@ -130,46 +136,44 @@
       if (block->IsExitBlock()) {
         SetExitBlock(nullptr);
       }
+      // Mark the block as removed. This is used by the HGraphBuilder to discard
+      // the block as a branch target.
+      block->SetGraph(nullptr);
     }
   }
 }
 
 GraphAnalysisResult HGraph::BuildDominatorTree() {
-  // (1) Simplify the CFG so that catch blocks have only exceptional incoming
-  //     edges. This invariant simplifies building SSA form because Phis cannot
-  //     collect both normal- and exceptional-flow values at the same time.
-  SimplifyCatchBlocks();
-
   ArenaBitVector visited(arena_, blocks_.size(), false, kArenaAllocGraphBuilder);
 
-  // (2) Find the back edges in the graph doing a DFS traversal.
+  // (1) Find the back edges in the graph doing a DFS traversal.
   FindBackEdges(&visited);
 
-  // (3) Remove instructions and phis from blocks not visited during
+  // (2) Remove instructions and phis from blocks not visited during
   //     the initial DFS as users from other instructions, so that
   //     users can be safely removed before uses later.
   RemoveInstructionsAsUsersFromDeadBlocks(visited);
 
-  // (4) Remove blocks not visited during the initial DFS.
+  // (3) Remove blocks not visited during the initial DFS.
   //     Step (5) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (5) Simplify the CFG now, so that we don't need to recompute
+  // (4) Simplify the CFG now, so that we don't need to recompute
   //     dominators and the reverse post order.
   SimplifyCFG();
 
-  // (6) Compute the dominance information and the reverse post order.
+  // (5) Compute the dominance information and the reverse post order.
   ComputeDominanceInformation();
 
-  // (7) Analyze loops discovered through back edge analysis, and
+  // (6) Analyze loops discovered through back edge analysis, and
   //     set the loop information on each block.
   GraphAnalysisResult result = AnalyzeLoops();
   if (result != kAnalysisSuccess) {
     return result;
   }
 
-  // (8) Precompute per-block try membership before entering the SSA builder,
+  // (7) Precompute per-block try membership before entering the SSA builder,
   //     which needs the information to build catch block phis from values of
   //     locals at throwing instructions inside try blocks.
   ComputeTryBlockInformation();
@@ -204,17 +208,35 @@
   return instruction;
 }
 
+static bool UpdateDominatorOfSuccessor(HBasicBlock* block, HBasicBlock* successor) {
+  DCHECK(ContainsElement(block->GetSuccessors(), successor));
+
+  HBasicBlock* old_dominator = successor->GetDominator();
+  HBasicBlock* new_dominator =
+      (old_dominator == nullptr) ? block
+                                 : CommonDominator::ForPair(old_dominator, block);
+
+  if (old_dominator == new_dominator) {
+    return false;
+  } else {
+    successor->SetDominator(new_dominator);
+    return true;
+  }
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
   reverse_post_order_.push_back(entry_block_);
 
   // Number of visits of a given node, indexed by block id.
-  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter(kArenaAllocGraphBuilder));
   // Number of successors visited from a given node, indexed by block id.
-  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  ArenaVector<size_t> successors_visited(blocks_.size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
   // Nodes for which we need to visit successors.
-  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
   constexpr size_t kDefaultWorklistSize = 8;
   worklist.reserve(kDefaultWorklistSize);
   worklist.push_back(entry_block_);
@@ -226,15 +248,7 @@
       worklist.pop_back();
     } else {
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
-
-      if (successor->GetDominator() == nullptr) {
-        successor->SetDominator(current);
-      } else {
-        // The CommonDominator can work for multiple blocks as long as the
-        // domination information doesn't change. However, since we're changing
-        // that information here, we can use the finder only for pairs of blocks.
-        successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current));
-      }
+      UpdateDominatorOfSuccessor(current, successor);
 
       // Once all the forward edges have been visited, we know the immediate
       // dominator of the block. We can then start visiting its successors.
@@ -246,6 +260,44 @@
     }
   }
 
+  // Check if the graph has back edges not dominated by their respective headers.
+  // If so, we need to update the dominators of those headers and recursively of
+  // their successors. We do that with a fix-point iteration over all blocks.
+  // The algorithm is guaranteed to terminate because it loops only if the sum
+  // of all dominator chains has decreased in the current iteration.
+  bool must_run_fix_point = false;
+  for (HBasicBlock* block : blocks_) {
+    if (block != nullptr &&
+        block->IsLoopHeader() &&
+        block->GetLoopInformation()->HasBackEdgeNotDominatedByHeader()) {
+      must_run_fix_point = true;
+      break;
+    }
+  }
+  if (must_run_fix_point) {
+    bool update_occurred = true;
+    while (update_occurred) {
+      update_occurred = false;
+      for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+        HBasicBlock* block = it.Current();
+        for (HBasicBlock* successor : block->GetSuccessors()) {
+          update_occurred |= UpdateDominatorOfSuccessor(block, successor);
+        }
+      }
+    }
+  }
+
+  // Make sure that there are no remaining blocks whose dominator information
+  // needs to be updated.
+  if (kIsDebugBuild) {
+    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HBasicBlock* successor : block->GetSuccessors()) {
+        DCHECK(!UpdateDominatorOfSuccessor(block, successor));
+      }
+    }
+  }
+
   // Populate `dominated_blocks_` information after computing all dominators.
   // The potential presence of irreducible loops requires to do it after.
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
@@ -316,85 +368,10 @@
     }
   }
 
-  // Place the suspend check at the beginning of the header, so that live registers
-  // will be known when allocating registers. Note that code generation can still
-  // generate the suspend check at the back edge, but needs to be careful with
-  // loop phi spill slots (which are not written to at back edge).
   HInstruction* first_instruction = header->GetFirstInstruction();
-  if (!first_instruction->IsSuspendCheck()) {
-    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
-    header->InsertInstructionBefore(check, first_instruction);
-    first_instruction = check;
-  }
-  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
-}
-
-static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessors()[pred_idx];
-  if (!predecessor->EndsWithTryBoundary()) {
-    // Only edges from HTryBoundary can be exceptional.
-    return false;
-  }
-  HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary();
-  if (try_boundary->GetNormalFlowSuccessor() == &block) {
-    // This block is the normal-flow successor of `try_boundary`, but it could
-    // also be one of its exception handlers if catch blocks have not been
-    // simplified yet. Predecessors are unordered, so we will consider the first
-    // occurrence to be the normal edge and a possible second occurrence to be
-    // the exceptional edge.
-    return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx);
-  } else {
-    // This is not the normal-flow successor of `try_boundary`, hence it must be
-    // one of its exception handlers.
-    DCHECK(try_boundary->HasExceptionHandler(block));
-    return true;
-  }
-}
-
-void HGraph::SimplifyCatchBlocks() {
-  // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
-  // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
-  for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
-    HBasicBlock* catch_block = blocks_[block_id];
-    if (catch_block == nullptr || !catch_block->IsCatchBlock()) {
-      continue;
-    }
-
-    bool exceptional_predecessors_only = true;
-    for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-      if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-        exceptional_predecessors_only = false;
-        break;
-      }
-    }
-
-    if (!exceptional_predecessors_only) {
-      // Catch block has normal-flow predecessors and needs to be simplified.
-      // Splitting the block before its first instruction moves all its
-      // instructions into `normal_block` and links the two blocks with a Goto.
-      // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
-      // leaving `catch_block` with the exceptional edges only.
-      //
-      // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a move-exception instruction, as guaranteed by the verifier. However,
-      // trivially dead predecessors are ignored by the verifier and such code
-      // has not been removed at this stage. We therefore ignore the assumption
-      // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
-      HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
-      if (normal_block == nullptr) {
-        // Catch block is either empty or only contains a move-exception. It must
-        // therefore be dead and will be removed during initial DCE. Do nothing.
-        DCHECK(!catch_block->EndsWithControlFlowInstruction());
-      } else {
-        // Catch block was split. Re-link normal-flow edges to the new block.
-        for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-          if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-            catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-            --j;
-          }
-        }
-      }
-    }
+  if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
+    // Called from DeadBlockElimination. Update SuspendCheck pointer.
+    info->SetSuspendCheck(first_instruction->AsSuspendCheck());
   }
 }
 
@@ -443,10 +420,9 @@
         HBasicBlock* successor = normal_successors[j];
         DCHECK(!successor->IsCatchBlock());
         if (successor == exit_block_) {
-          // Throw->TryBoundary->Exit. Special case which we do not want to split
-          // because Goto->Exit is not allowed.
+          // (Throw/Return/ReturnVoid)->TryBoundary->Exit. Special case which we
+          // do not want to split because Goto->Exit is not allowed.
           DCHECK(block->IsSingleTryBoundary());
-          DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow());
         } else if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
           // SplitCriticalEdge could have invalidated the `normal_successors`
@@ -459,8 +435,10 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
-    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
-      // We are being called by the dead code elimination pass, and what used to be
+    } else if (!block->IsEntryBlock() &&
+               block->GetFirstInstruction() != nullptr &&
+               block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
       // a loop got dismantled. Just remove the suspend check.
       block->RemoveInstruction(block->GetFirstInstruction());
     }
@@ -468,8 +446,10 @@
 }
 
 GraphAnalysisResult HGraph::AnalyzeLoops() const {
-  // Order does not matter.
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+  // We iterate post order to ensure we visit inner loops before outer loops.
+  // `PopulateRecursive` needs this guarantee to know whether a natural loop
+  // contains an irreducible loop.
+  for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       if (block->IsCatchBlock()) {
@@ -498,12 +478,25 @@
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
-  // New constants are inserted before the final control-flow instruction
-  // of the graph, or at its end if called from the graph builder.
-  if (entry_block_->EndsWithControlFlowInstruction()) {
-    entry_block_->InsertInstructionBefore(constant, entry_block_->GetLastInstruction());
-  } else {
+  // New constants are inserted before the SuspendCheck at the bottom of the
+  // entry block. Note that this method can be called from the graph builder and
+  // the entry block therefore may not end with SuspendCheck->Goto yet.
+  HInstruction* insert_before = nullptr;
+
+  HInstruction* gota = entry_block_->GetLastInstruction();
+  if (gota != nullptr && gota->IsGoto()) {
+    HInstruction* suspend_check = gota->GetPrevious();
+    if (suspend_check != nullptr && suspend_check->IsSuspendCheck()) {
+      insert_before = suspend_check;
+    } else {
+      insert_before = gota;
+    }
+  }
+
+  if (insert_before == nullptr) {
     entry_block_->AddInstruction(constant);
+  } else {
+    entry_block_->InsertInstructionBefore(constant, insert_before);
   }
 }
 
@@ -589,16 +582,29 @@
 
   blocks_.SetBit(block->GetBlockId());
   block->SetInLoop(this);
+  if (block->IsLoopHeader()) {
+    // We're visiting loops in post-order, so inner loops must have been
+    // populated already.
+    DCHECK(block->GetLoopInformation()->IsPopulated());
+    if (block->GetLoopInformation()->IsIrreducible()) {
+      contains_irreducible_loop_ = true;
+    }
+  }
   for (HBasicBlock* predecessor : block->GetPredecessors()) {
     PopulateRecursive(predecessor);
   }
 }
 
-void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block) {
-  if (blocks_.IsBitSet(block->GetBlockId())) {
+void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized) {
+  size_t block_id = block->GetBlockId();
+
+  // If `block` is in `finalized`, we know its membership in the loop has been
+  // decided and it does not need to be revisited.
+  if (finalized->IsBitSet(block_id)) {
     return;
   }
 
+  bool is_finalized = false;
   if (block->IsLoopHeader()) {
     // If we hit a loop header in an irreducible loop, we first check if the
     // pre header of that loop belongs to the currently analyzed loop. If it does,
@@ -606,26 +612,36 @@
     // Note that we cannot use GetPreHeader, as the loop may have not been populated
     // yet.
     HBasicBlock* pre_header = block->GetPredecessors()[0];
-    PopulateIrreducibleRecursive(pre_header);
+    PopulateIrreducibleRecursive(pre_header, finalized);
     if (blocks_.IsBitSet(pre_header->GetBlockId())) {
-      blocks_.SetBit(block->GetBlockId());
       block->SetInLoop(this);
+      blocks_.SetBit(block_id);
+      finalized->SetBit(block_id);
+      is_finalized = true;
+
       HLoopInformation* info = block->GetLoopInformation();
       for (HBasicBlock* back_edge : info->GetBackEdges()) {
-        PopulateIrreducibleRecursive(back_edge);
+        PopulateIrreducibleRecursive(back_edge, finalized);
       }
     }
   } else {
     // Visit all predecessors. If one predecessor is part of the loop, this
     // block is also part of this loop.
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
-      PopulateIrreducibleRecursive(predecessor);
-      if (blocks_.IsBitSet(predecessor->GetBlockId())) {
-        blocks_.SetBit(block->GetBlockId());
+      PopulateIrreducibleRecursive(predecessor, finalized);
+      if (!is_finalized && blocks_.IsBitSet(predecessor->GetBlockId())) {
         block->SetInLoop(this);
+        blocks_.SetBit(block_id);
+        finalized->SetBit(block_id);
+        is_finalized = true;
       }
     }
   }
+
+  // All predecessors have been recursively visited. Mark finalized if not marked yet.
+  if (!is_finalized) {
+    finalized->SetBit(block_id);
+  }
 }
 
 void HLoopInformation::Populate() {
@@ -635,22 +651,51 @@
   // to end the recursion.
   // This is a recursive implementation of the algorithm described in
   // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+  HGraph* graph = header_->GetGraph();
   blocks_.SetBit(header_->GetBlockId());
   header_->SetInLoop(this);
-  for (HBasicBlock* back_edge : GetBackEdges()) {
-    DCHECK(back_edge->GetDominator() != nullptr);
-    if (!header_->Dominates(back_edge)) {
-      irreducible_ = true;
-      header_->GetGraph()->SetHasIrreducibleLoops(true);
-      PopulateIrreducibleRecursive(back_edge);
-    } else {
-      if (header_->GetGraph()->IsCompilingOsr()) {
-        irreducible_ = true;
-        header_->GetGraph()->SetHasIrreducibleLoops(true);
-      }
+
+  bool is_irreducible_loop = HasBackEdgeNotDominatedByHeader();
+
+  if (is_irreducible_loop) {
+    ArenaBitVector visited(graph->GetArena(),
+                           graph->GetBlocks().size(),
+                           /* expandable */ false,
+                           kArenaAllocGraphBuilder);
+    // Stop marking blocks at the loop header.
+    visited.SetBit(header_->GetBlockId());
+
+    for (HBasicBlock* back_edge : GetBackEdges()) {
+      PopulateIrreducibleRecursive(back_edge, &visited);
+    }
+  } else {
+    for (HBasicBlock* back_edge : GetBackEdges()) {
       PopulateRecursive(back_edge);
     }
   }
+
+  if (!is_irreducible_loop && graph->IsCompilingOsr()) {
+    // When compiling in OSR mode, all loops in the compiled method may be entered
+    // from the interpreter. We treat this OSR entry point just like an extra entry
+    // to an irreducible loop, so we need to mark the method's loops as irreducible.
+    // This does not apply to inlined loops which do not act as OSR entry points.
+    if (suspend_check_ == nullptr) {
+      // Just building the graph in OSR mode, this loop is not inlined. We never build an
+      // inner graph in OSR mode as we can do OSR transition only from the outer method.
+      is_irreducible_loop = true;
+    } else {
+      // Look at the suspend check's environment to determine if the loop was inlined.
+      DCHECK(suspend_check_->HasEnvironment());
+      if (!suspend_check_->GetEnvironment()->IsFromInlinedInvoke()) {
+        is_irreducible_loop = true;
+      }
+    }
+  }
+  if (is_irreducible_loop) {
+    irreducible_ = true;
+    contains_irreducible_loop_ = true;
+    graph->SetHasIrreducibleLoops(true);
+  }
 }
 
 HBasicBlock* HLoopInformation::GetPreHeader() const {
@@ -679,6 +724,16 @@
   return last_position;
 }
 
+bool HLoopInformation::HasBackEdgeNotDominatedByHeader() const {
+  for (HBasicBlock* back_edge : GetBackEdges()) {
+    DCHECK(back_edge->GetDominator() != nullptr);
+    if (!header_->Dominates(back_edge)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -710,8 +765,8 @@
     DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid);
     DCHECK_EQ(initial->GetBlock(), this);
     DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid);
-    DCHECK(initial->GetUses().IsEmpty());
-    DCHECK(initial->GetEnvUses().IsEmpty());
+    DCHECK(initial->GetUses().empty());
+    DCHECK(initial->GetEnvUses().empty());
     replacement->SetBlock(this);
     replacement->SetId(GetGraph()->GetNextInstructionId());
     instructions_.InsertInstructionBefore(replacement, initial);
@@ -803,8 +858,8 @@
   instruction->SetBlock(nullptr);
   instruction_list->RemoveInstruction(instruction);
   if (ensure_safety) {
-    DCHECK(instruction->GetUses().IsEmpty());
-    DCHECK(instruction->GetEnvUses().IsEmpty());
+    DCHECK(instruction->GetUses().empty());
+    DCHECK(instruction->GetEnvUses().empty());
     RemoveAsUser(instruction);
   }
 }
@@ -868,8 +923,11 @@
 }
 
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
-  const HUserRecord<HEnvironment*>& user_record = vregs_[index];
-  user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
+  const HUserRecord<HEnvironment*>& env_use = vregs_[index];
+  HInstruction* user = env_use.GetInstruction();
+  auto before_env_use_node = env_use.GetBeforeUseNode();
+  user->env_uses_.erase_after(before_env_use_node);
+  user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
 }
 
 HInstruction::InstructionKind HInstruction::GetKind() const {
@@ -1007,32 +1065,43 @@
   }
 }
 
+void HInstruction::RemoveEnvironment() {
+  RemoveEnvironmentUses(this);
+  environment_ = nullptr;
+}
+
 void HInstruction::ReplaceWith(HInstruction* other) {
   DCHECK(other != nullptr);
-  for (HUseIterator<HInstruction*> it(GetUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HInstruction*>* current = it.Current();
-    HInstruction* user = current->GetUser();
-    size_t input_index = current->GetIndex();
-    user->SetRawInputAt(input_index, other);
-    other->AddUseAt(user, input_index);
-  }
+  // Note: fixup_end remains valid across splice_after().
+  auto fixup_end = other->uses_.empty() ? other->uses_.begin() : ++other->uses_.begin();
+  other->uses_.splice_after(other->uses_.before_begin(), uses_);
+  other->FixUpUserRecordsAfterUseInsertion(fixup_end);
 
-  for (HUseIterator<HEnvironment*> it(GetEnvUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HEnvironment*>* current = it.Current();
-    HEnvironment* user = current->GetUser();
-    size_t input_index = current->GetIndex();
-    user->SetRawEnvAt(input_index, other);
-    other->AddEnvUseAt(user, input_index);
-  }
+  // Note: env_fixup_end remains valid across splice_after().
+  auto env_fixup_end =
+      other->env_uses_.empty() ? other->env_uses_.begin() : ++other->env_uses_.begin();
+  other->env_uses_.splice_after(other->env_uses_.before_begin(), env_uses_);
+  other->FixUpUserRecordsAfterEnvUseInsertion(env_fixup_end);
 
-  uses_.Clear();
-  env_uses_.Clear();
+  DCHECK(uses_.empty());
+  DCHECK(env_uses_.empty());
 }
 
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
-  RemoveAsUserOfInput(index);
-  SetRawInputAt(index, replacement);
-  replacement->AddUseAt(this, index);
+  HUserRecord<HInstruction*> input_use = InputRecordAt(index);
+  if (input_use.GetInstruction() == replacement) {
+    // Nothing to do.
+    return;
+  }
+  HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+  // Note: fixup_end remains valid across splice_after().
+  auto fixup_end =
+      replacement->uses_.empty() ? replacement->uses_.begin() : ++replacement->uses_.begin();
+  replacement->uses_.splice_after(replacement->uses_.before_begin(),
+                                  input_use.GetInstruction()->uses_,
+                                  before_use_node);
+  replacement->FixUpUserRecordsAfterUseInsertion(fixup_end);
+  input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
 }
 
 size_t HInstruction::EnvironmentSize() const {
@@ -1304,17 +1373,18 @@
   DCHECK_EQ(InputCount(), 0u);
 
   // Find the target block.
-  HUseIterator<HInstruction*> uses_it(GetUses());
-  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
-  uses_it.Advance();
-  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
-    uses_it.Advance();
+  auto uses_it = GetUses().begin();
+  auto uses_end = GetUses().end();
+  HBasicBlock* target_block = uses_it->GetUser()->GetBlock();
+  ++uses_it;
+  while (uses_it != uses_end && uses_it->GetUser()->GetBlock() == target_block) {
+    ++uses_it;
   }
-  if (!uses_it.Done()) {
+  if (uses_it != uses_end) {
     // This instruction has uses in two or more blocks. Find the common dominator.
     CommonDominator finder(target_block);
-    for (; !uses_it.Done(); uses_it.Advance()) {
-      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    for (; uses_it != uses_end; ++uses_it) {
+      finder.Update(uses_it->GetUser()->GetBlock());
     }
     target_block = finder.Get();
     DCHECK(target_block != nullptr);
@@ -1327,10 +1397,10 @@
 
   // Find insertion position.
   HInstruction* insert_pos = nullptr;
-  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
-    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
-        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
-      insert_pos = uses_it2.Current()->GetUser();
+  for (const HUseListNode<HInstruction*>& use : GetUses()) {
+    if (use.GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = use.GetUser();
     }
   }
   if (insert_pos == nullptr) {
@@ -1395,34 +1465,6 @@
   return new_block;
 }
 
-HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
-  DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
-
-  HInstruction* first_insn = GetFirstInstruction();
-  HInstruction* split_before = nullptr;
-
-  if (first_insn != nullptr && first_insn->IsLoadException()) {
-    // Catch block starts with a LoadException. Split the block after
-    // the StoreLocal and ClearException which must come after the load.
-    DCHECK(first_insn->GetNext()->IsStoreLocal());
-    DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-    split_before = first_insn->GetNext()->GetNext()->GetNext();
-  } else {
-    // Catch block does not load the exception. Split at the beginning
-    // to create an empty catch block.
-    split_before = first_insn;
-  }
-
-  if (split_before == nullptr) {
-    // Catch block has no instructions after the split point (must be dead).
-    // Do not split it but rather signal error by returning nullptr.
-    return nullptr;
-  } else {
-    return SplitBefore(split_before);
-  }
-}
-
 HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
   DCHECK_EQ(cursor->GetBlock(), this);
 
@@ -1638,10 +1680,10 @@
 static void RemoveUsesOfDeadInstruction(HInstruction* insn) {
   DCHECK(!insn->HasEnvironmentUses());
   while (insn->HasNonEnvironmentUses()) {
-    HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst();
-    size_t use_index = use->GetIndex();
-    HBasicBlock* user_block =  use->GetUser()->GetBlock();
-    DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock());
+    const HUseListNode<HInstruction*>& use = insn->GetUses().front();
+    size_t use_index = use.GetIndex();
+    HBasicBlock* user_block =  use.GetUser()->GetBlock();
+    DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock());
     for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
       phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
     }
@@ -1654,21 +1696,77 @@
   // iteration.
   DCHECK(dominated_blocks_.empty());
 
-  // (1) Remove the block from all loops it is included in.
-  for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
-    HLoopInformation* loop_info = it.Current();
-    loop_info->Remove(this);
-    if (loop_info->IsBackEdge(*this)) {
-      // If this was the last back edge of the loop, we deliberately leave the
-      // loop in an inconsistent state and will fail GraphChecker unless the
-      // entire loop is removed during the pass.
-      loop_info->RemoveBackEdge(this);
-    }
+  // The following steps gradually remove the block from all its dependants in
+  // post order (b/27683071).
+
+  // (1) Store a basic block that we'll use in step (5) to find loops to be updated.
+  //     We need to do this before step (4) which destroys the predecessor list.
+  HBasicBlock* loop_update_start = this;
+  if (IsLoopHeader()) {
+    HLoopInformation* loop_info = GetLoopInformation();
+    // All other blocks in this loop should have been removed because the header
+    // was their dominator.
+    // Note that we do not remove `this` from `loop_info` as it is unreachable.
+    DCHECK(!loop_info->IsIrreducible());
+    DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 1u);
+    DCHECK_EQ(static_cast<uint32_t>(loop_info->GetBlocks().GetHighestBitSet()), GetBlockId());
+    loop_update_start = loop_info->GetPreHeader();
   }
 
-  // (2) Disconnect the block from its predecessors and update their
+  // (2) Disconnect the block from its successors and update their phis.
+  for (HBasicBlock* successor : successors_) {
+    // Delete this block from the list of predecessors.
+    size_t this_index = successor->GetPredecessorIndexOf(this);
+    successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
+
+    // Check that `successor` has other predecessors, otherwise `this` is the
+    // dominator of `successor` which violates the order DCHECKed at the top.
+    DCHECK(!successor->predecessors_.empty());
+
+    // Remove this block's entries in the successor's phis. Skip exceptional
+    // successors because catch phi inputs do not correspond to predecessor
+    // blocks but throwing instructions. The inputs of the catch phis will be
+    // updated in step (3).
+    if (!successor->IsCatchBlock()) {
+      if (successor->predecessors_.size() == 1u) {
+        // The successor has just one predecessor left. Replace phis with the only
+        // remaining input.
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          HPhi* phi = phi_it.Current()->AsPhi();
+          phi->ReplaceWith(phi->InputAt(1 - this_index));
+          successor->RemovePhi(phi);
+        }
+      } else {
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+        }
+      }
+    }
+  }
+  successors_.clear();
+
+  // (3) Remove instructions and phis. Instructions should have no remaining uses
+  //     except in catch phis. If an instruction is used by a catch phi at `index`,
+  //     remove `index`-th input of all phis in the catch block since they are
+  //     guaranteed dead. Note that we may miss dead inputs this way but the
+  //     graph will always remain consistent.
+  for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* insn = it.Current();
+    RemoveUsesOfDeadInstruction(insn);
+    RemoveInstruction(insn);
+  }
+  for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* insn = it.Current()->AsPhi();
+    RemoveUsesOfDeadInstruction(insn);
+    RemovePhi(insn);
+  }
+
+  // (4) Disconnect the block from its predecessors and update their
   //     control-flow instructions.
   for (HBasicBlock* predecessor : predecessors_) {
+    // We should not see any back edges as they would have been removed by step (3).
+    DCHECK(!IsInLoop() || !GetLoopInformation()->IsBackEdge(*predecessor));
+
     HInstruction* last_instruction = predecessor->GetLastInstruction();
     if (last_instruction->IsTryBoundary() && !IsCatchBlock()) {
       // This block is the only normal-flow successor of the TryBoundary which
@@ -1712,58 +1810,25 @@
   }
   predecessors_.clear();
 
-  // (3) Disconnect the block from its successors and update their phis.
-  for (HBasicBlock* successor : successors_) {
-    // Delete this block from the list of predecessors.
-    size_t this_index = successor->GetPredecessorIndexOf(this);
-    successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
-
-    // Check that `successor` has other predecessors, otherwise `this` is the
-    // dominator of `successor` which violates the order DCHECKed at the top.
-    DCHECK(!successor->predecessors_.empty());
-
-    // Remove this block's entries in the successor's phis. Skip exceptional
-    // successors because catch phi inputs do not correspond to predecessor
-    // blocks but throwing instructions. Their inputs will be updated in step (4).
-    if (!successor->IsCatchBlock()) {
-      if (successor->predecessors_.size() == 1u) {
-        // The successor has just one predecessor left. Replace phis with the only
-        // remaining input.
-        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-          HPhi* phi = phi_it.Current()->AsPhi();
-          phi->ReplaceWith(phi->InputAt(1 - this_index));
-          successor->RemovePhi(phi);
-        }
-      } else {
-        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-          phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
-        }
-      }
+  // (5) Remove the block from all loops it is included in. Skip the inner-most
+  //     loop if this is the loop header (see definition of `loop_update_start`)
+  //     because the loop header's predecessor list has been destroyed in step (4).
+  for (HLoopInformationOutwardIterator it(*loop_update_start); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(this);
+    if (loop_info->IsBackEdge(*this)) {
+      // If this was the last back edge of the loop, we deliberately leave the
+      // loop in an inconsistent state and will fail GraphChecker unless the
+      // entire loop is removed during the pass.
+      loop_info->RemoveBackEdge(this);
     }
   }
-  successors_.clear();
 
-  // (4) Remove instructions and phis. Instructions should have no remaining uses
-  //     except in catch phis. If an instruction is used by a catch phi at `index`,
-  //     remove `index`-th input of all phis in the catch block since they are
-  //     guaranteed dead. Note that we may miss dead inputs this way but the
-  //     graph will always remain consistent.
-  for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
-    HInstruction* insn = it.Current();
-    RemoveUsesOfDeadInstruction(insn);
-    RemoveInstruction(insn);
-  }
-  for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
-    HPhi* insn = it.Current()->AsPhi();
-    RemoveUsesOfDeadInstruction(insn);
-    RemovePhi(insn);
-  }
-
-  // Disconnect from the dominator.
+  // (6) Disconnect from the dominator.
   dominator_->RemoveDominatedBlock(this);
   SetDominator(nullptr);
 
-  // Delete from the graph, update reverse post order.
+  // (7) Delete from the graph, update reverse post order.
   graph_->DeleteDeadEmptyBlock(this);
   SetGraph(nullptr);
 }
@@ -1878,6 +1943,7 @@
 
   RemoveElement(reverse_post_order_, block);
   blocks_[block->GetBlockId()] = nullptr;
+  block->SetGraph(nullptr);
 }
 
 void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
@@ -1930,6 +1996,7 @@
            instr_it.Advance()) {
         HInstruction* current = instr_it.Current();
         if (current->NeedsEnvironment()) {
+          DCHECK(current->HasEnvironment());
           current->GetEnvironment()->SetAndCopyParentChain(
               outer_graph->GetArena(), invoke->GetEnvironment());
         }
@@ -2217,6 +2284,8 @@
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     DCHECK(IsValidHandle(type_handle));
+    DCHECK(!type_handle->IsErroneous());
+    DCHECK(!type_handle->IsArrayClass() || !type_handle->GetComponentType()->IsErroneous());
     if (!is_exact) {
       DCHECK(!type_handle->CannotBeAssignedFromOtherTypes())
           << "Callers of ReferenceTypeInfo::Create should ensure is_exact is properly computed";
@@ -2364,13 +2433,66 @@
   }
 }
 
-void HInstruction::RemoveEnvironmentUsers() {
-  for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
-    HUseListNode<HEnvironment*>* user_node = use_it.Current();
-    HEnvironment* user = user_node->GetUser();
-    user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+bool HLoadString::InstructionDataEquals(HInstruction* other) const {
+  HLoadString* other_load_string = other->AsLoadString();
+  if (string_index_ != other_load_string->string_index_ ||
+      GetPackedFields() != other_load_string->GetPackedFields()) {
+    return false;
   }
-  env_uses_.Clear();
+  LoadKind load_kind = GetLoadKind();
+  if (HasAddress(load_kind)) {
+    return GetAddress() == other_load_string->GetAddress();
+  } else if (HasStringReference(load_kind)) {
+    return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
+  } else {
+    DCHECK(HasDexCacheReference(load_kind)) << load_kind;
+    // If the string indexes and dex files are the same, dex cache element offsets
+    // must also be the same, so we don't need to compare them.
+    return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
+  }
+}
+
+void HLoadString::SetLoadKindInternal(LoadKind load_kind) {
+  // Once sharpened, the load kind should not be changed again.
+  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  SetPackedField<LoadKindField>(load_kind);
+
+  if (load_kind != LoadKind::kDexCacheViaMethod) {
+    RemoveAsUserOfInput(0u);
+    SetRawInputAt(0u, nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
+  switch (rhs) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      return os << "BootImageLinkTimeAddress";
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
+    case HLoadString::LoadKind::kBootImageAddress:
+      return os << "BootImageAddress";
+    case HLoadString::LoadKind::kDexCacheAddress:
+      return os << "DexCacheAddress";
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      return os << "DexCachePcRelative";
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
+    default:
+      LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+void HInstruction::RemoveEnvironmentUsers() {
+  for (const HUseListNode<HEnvironment*>& use : GetEnvUses()) {
+    HEnvironment* user = use.GetUser();
+    user->SetRawEnvAt(use.GetIndex(), nullptr);
+  }
+  env_uses_.clear();
 }
 
 // Returns an instruction with the opposite Boolean value from 'cond'.
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e9a42cb..12ea059 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,7 +26,6 @@
 #include "base/arena_object.h"
 #include "base/stl_util.h"
 #include "dex/compiler_enums.h"
-#include "dex_instruction-inl.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -37,6 +36,7 @@
 #include "offsets.h"
 #include "primitive.h"
 #include "utils/array_ref.h"
+#include "utils/intrusive_forward_list.h"
 
 namespace art {
 
@@ -101,6 +101,7 @@
 };
 
 enum GraphAnalysisResult {
+  kAnalysisSkipped,
   kAnalysisInvalidBytecode,
   kAnalysisFailThrowCatchLoop,
   kAnalysisFailAmbiguousArrayOp,
@@ -169,7 +170,7 @@
     return handle.GetReference() != nullptr;
   }
 
-  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsValid() const {
     return IsValidHandle(type_handle_);
   }
 
@@ -427,6 +428,10 @@
     number_of_in_vregs_ = value;
   }
 
+  uint16_t GetNumberOfInVRegs() const {
+    return number_of_in_vregs_;
+  }
+
   uint16_t GetNumberOfLocalVRegs() const {
     DCHECK(!in_ssa_form_);
     return number_of_vregs_ - number_of_in_vregs_;
@@ -645,6 +650,7 @@
       : header_(header),
         suspend_check_(nullptr),
         irreducible_(false),
+        contains_irreducible_loop_(false),
         back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)),
         // Make bit vector growable, as the number of blocks may change.
         blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) {
@@ -652,6 +658,7 @@
   }
 
   bool IsIrreducible() const { return irreducible_; }
+  bool ContainsIrreducibleLoop() const { return contains_irreducible_loop_; }
 
   void Dump(std::ostream& os);
 
@@ -720,14 +727,21 @@
     blocks_.ClearAllBits();
   }
 
+  bool HasBackEdgeNotDominatedByHeader() const;
+
+  bool IsPopulated() const {
+    return blocks_.GetHighestBitSet() != -1;
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
-  void PopulateIrreducibleRecursive(HBasicBlock* block);
+  void PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized);
 
   HBasicBlock* header_;
   HSuspendCheck* suspend_check_;
   bool irreducible_;
+  bool contains_irreducible_loop_;
   ArenaVector<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -999,15 +1013,6 @@
   // Similar to `SplitBeforeForInlining` but does it after `cursor`.
   HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
-  // Split catch block into two blocks after the original move-exception bytecode
-  // instruction, or at the beginning if not present. Returns the newly created,
-  // latter block, or nullptr if such block could not be created (must be dead
-  // in that case). Note that this method just updates raw block information,
-  // like predecessors, successors, dominators, and instruction list. It does not
-  // update the graph, reverse post order, loop information, nor make sure the
-  // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitCatchBlockAfterMoveException();
-
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
   // that this method does not update the graph, reverse post order, loop
@@ -1220,9 +1225,7 @@
   M(LessThanOrEqual, Condition)                                         \
   M(LoadClass, Instruction)                                             \
   M(LoadException, Instruction)                                         \
-  M(LoadLocal, Instruction)                                             \
   M(LoadString, Instruction)                                            \
-  M(Local, Instruction)                                                 \
   M(LongConstant, Constant)                                             \
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
@@ -1253,7 +1256,6 @@
   M(UnresolvedStaticFieldGet, Instruction)                              \
   M(UnresolvedStaticFieldSet, Instruction)                              \
   M(Select, Instruction)                                                \
-  M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
   M(Throw, Instruction)                                                 \
@@ -1342,127 +1344,31 @@
   const H##type* As##type() const { return this; }                      \
   H##type* As##type() { return this; }
 
-template <typename T> class HUseList;
-
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
-  HUseListNode* GetPrevious() const { return prev_; }
-  HUseListNode* GetNext() const { return next_; }
   T GetUser() const { return user_; }
   size_t GetIndex() const { return index_; }
   void SetIndex(size_t index) { index_ = index; }
 
+  // Hook for the IntrusiveForwardList<>.
+  // TODO: Hide this better.
+  IntrusiveForwardListHook hook;
+
  private:
   HUseListNode(T user, size_t index)
-      : user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
+      : user_(user), index_(index) {}
 
   T const user_;
   size_t index_;
-  HUseListNode<T>* prev_;
-  HUseListNode<T>* next_;
 
-  friend class HUseList<T>;
+  friend class HInstruction;
 
   DISALLOW_COPY_AND_ASSIGN(HUseListNode);
 };
 
 template <typename T>
-class HUseList : public ValueObject {
- public:
-  HUseList() : first_(nullptr) {}
-
-  void Clear() {
-    first_ = nullptr;
-  }
-
-  // Adds a new entry at the beginning of the use list and returns
-  // the newly created node.
-  HUseListNode<T>* AddUse(T user, size_t index, ArenaAllocator* arena) {
-    HUseListNode<T>* new_node = new (arena) HUseListNode<T>(user, index);
-    if (IsEmpty()) {
-      first_ = new_node;
-    } else {
-      first_->prev_ = new_node;
-      new_node->next_ = first_;
-      first_ = new_node;
-    }
-    return new_node;
-  }
-
-  HUseListNode<T>* GetFirst() const {
-    return first_;
-  }
-
-  void Remove(HUseListNode<T>* node) {
-    DCHECK(node != nullptr);
-    DCHECK(Contains(node));
-
-    if (node->prev_ != nullptr) {
-      node->prev_->next_ = node->next_;
-    }
-    if (node->next_ != nullptr) {
-      node->next_->prev_ = node->prev_;
-    }
-    if (node == first_) {
-      first_ = node->next_;
-    }
-  }
-
-  bool Contains(const HUseListNode<T>* node) const {
-    if (node == nullptr) {
-      return false;
-    }
-    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
-      if (current == node) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  bool IsEmpty() const {
-    return first_ == nullptr;
-  }
-
-  bool HasOnlyOneUse() const {
-    return first_ != nullptr && first_->next_ == nullptr;
-  }
-
-  size_t SizeSlow() const {
-    size_t count = 0;
-    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
-      ++count;
-    }
-    return count;
-  }
-
- private:
-  HUseListNode<T>* first_;
-};
-
-template<typename T>
-class HUseIterator : public ValueObject {
- public:
-  explicit HUseIterator(const HUseList<T>& uses) : current_(uses.GetFirst()) {}
-
-  bool Done() const { return current_ == nullptr; }
-
-  void Advance() {
-    DCHECK(!Done());
-    current_ = current_->GetNext();
-  }
-
-  HUseListNode<T>* Current() const {
-    DCHECK(!Done());
-    return current_;
-  }
-
- private:
-  HUseListNode<T>* current_;
-
-  friend class HValue;
-};
+using HUseList = IntrusiveForwardList<HUseListNode<T>>;
 
 // This class is used by HEnvironment and HInstruction classes to record the
 // instructions they use and pointers to the corresponding HUseListNodes kept
@@ -1470,25 +1376,26 @@
 template <typename T>
 class HUserRecord : public ValueObject {
  public:
-  HUserRecord() : instruction_(nullptr), use_node_(nullptr) {}
-  explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {}
+  HUserRecord() : instruction_(nullptr), before_use_node_() {}
+  explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), before_use_node_() {}
 
-  HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node)
-    : instruction_(old_record.instruction_), use_node_(use_node) {
+  HUserRecord(const HUserRecord<T>& old_record, typename HUseList<T>::iterator before_use_node)
+      : HUserRecord(old_record.instruction_, before_use_node) {}
+  HUserRecord(HInstruction* instruction, typename HUseList<T>::iterator before_use_node)
+      : instruction_(instruction), before_use_node_(before_use_node) {
     DCHECK(instruction_ != nullptr);
-    DCHECK(use_node_ != nullptr);
-    DCHECK(old_record.use_node_ == nullptr);
   }
 
   HInstruction* GetInstruction() const { return instruction_; }
-  HUseListNode<T>* GetUseNode() const { return use_node_; }
+  typename HUseList<T>::iterator GetBeforeUseNode() const { return before_use_node_; }
+  typename HUseList<T>::iterator GetUseNode() const { return ++GetBeforeUseNode(); }
 
  private:
   // Instruction used by the user.
   HInstruction* instruction_;
 
-  // Corresponding entry in the use list kept by 'instruction_'.
-  HUseListNode<T>* use_node_;
+  // Iterator before the corresponding entry in the use list kept by 'instruction_'.
+  typename HUseList<T>::iterator before_use_node_;
 };
 
 /**
@@ -1559,21 +1466,21 @@
   static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset));
+        : SideEffects(TypeFlag(type, kFieldWriteOffset));
   }
 
   static SideEffects ArrayWriteOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset));
+    return SideEffects(TypeFlag(type, kArrayWriteOffset));
   }
 
   static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) {
     return is_volatile
         ? AllWritesAndReads()
-        : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset));
+        : SideEffects(TypeFlag(type, kFieldReadOffset));
   }
 
   static SideEffects ArrayReadOfType(Primitive::Type type) {
-    return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset));
+    return SideEffects(TypeFlag(type, kArrayReadOffset));
   }
 
   static SideEffects CanTriggerGC() {
@@ -1700,23 +1607,6 @@
   static constexpr uint64_t kAllReads =
       ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset;
 
-  // Work around the fact that HIR aliases I/F and J/D.
-  // TODO: remove this interceptor once HIR types are clean
-  static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) {
-    switch (type) {
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        return TypeFlag(Primitive::kPrimInt, offset) |
-               TypeFlag(Primitive::kPrimFloat, offset);
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        return TypeFlag(Primitive::kPrimLong, offset) |
-               TypeFlag(Primitive::kPrimDouble, offset);
-      default:
-        return TypeFlag(type, offset);
-    }
-  }
-
   // Translates type to bit flag.
   static uint64_t TypeFlag(Primitive::Type type, int offset) {
     CHECK_NE(type, Primitive::kPrimVoid);
@@ -1830,14 +1720,6 @@
   }
 
  private:
-  // Record instructions' use entries of this environment for constant-time removal.
-  // It should only be called by HInstruction when a new environment use is added.
-  void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
-    DCHECK(env_use->GetUser() == this);
-    size_t index = env_use->GetIndex();
-    vregs_[index] = HUserRecord<HEnvironment*>(vregs_[index], env_use);
-  }
-
   ArenaVector<HUserRecord<HEnvironment*>> vregs_;
   ArenaVector<Location> locations_;
   HEnvironment* parent_;
@@ -1941,36 +1823,44 @@
   ReferenceTypeInfo GetReferenceTypeInfo() const {
     DCHECK_EQ(GetType(), Primitive::kPrimNot);
     return ReferenceTypeInfo::CreateUnchecked(reference_type_handle_,
-                                              GetPackedFlag<kFlagReferenceTypeIsExact>());;
+                                              GetPackedFlag<kFlagReferenceTypeIsExact>());
   }
 
   void AddUseAt(HInstruction* user, size_t index) {
     DCHECK(user != nullptr);
-    HUseListNode<HInstruction*>* use =
-        uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
-    user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use));
+    // Note: fixup_end remains valid across push_front().
+    auto fixup_end = uses_.empty() ? uses_.begin() : ++uses_.begin();
+    HUseListNode<HInstruction*>* new_node =
+        new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HInstruction*>(user, index);
+    uses_.push_front(*new_node);
+    FixUpUserRecordsAfterUseInsertion(fixup_end);
   }
 
   void AddEnvUseAt(HEnvironment* user, size_t index) {
     DCHECK(user != nullptr);
-    HUseListNode<HEnvironment*>* env_use =
-        env_uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
-    user->RecordEnvUse(env_use);
+    // Note: env_fixup_end remains valid across push_front().
+    auto env_fixup_end = env_uses_.empty() ? env_uses_.begin() : ++env_uses_.begin();
+    HUseListNode<HEnvironment*>* new_node =
+        new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HEnvironment*>(user, index);
+    env_uses_.push_front(*new_node);
+    FixUpUserRecordsAfterEnvUseInsertion(env_fixup_end);
   }
 
   void RemoveAsUserOfInput(size_t input) {
     HUserRecord<HInstruction*> input_use = InputRecordAt(input);
-    input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode());
+    HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+    input_use.GetInstruction()->uses_.erase_after(before_use_node);
+    input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
   }
 
   const HUseList<HInstruction*>& GetUses() const { return uses_; }
   const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
 
-  bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
-  bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
-  bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
+  bool HasUses() const { return !uses_.empty() || !env_uses_.empty(); }
+  bool HasEnvironmentUses() const { return !env_uses_.empty(); }
+  bool HasNonEnvironmentUses() const { return !uses_.empty(); }
   bool HasOnlyOneNonEnvironmentUse() const {
-    return !HasEnvironmentUses() && GetUses().HasOnlyOneUse();
+    return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
   }
 
   // Does this instruction strictly dominate `other_instruction`?
@@ -1995,6 +1885,8 @@
     environment_ = environment;
   }
 
+  void RemoveEnvironment();
+
   // Set the environment of this instruction, copying it from `environment`. While
   // copying, the uses lists are being updated.
   void CopyEnvironmentFrom(HEnvironment* environment) {
@@ -2170,7 +2062,45 @@
   }
 
  private:
-  void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
+  void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) {
+    auto before_use_node = uses_.before_begin();
+    for (auto use_node = uses_.begin(); use_node != fixup_end; ++use_node) {
+      HInstruction* user = use_node->GetUser();
+      size_t input_index = use_node->GetIndex();
+      user->SetRawInputRecordAt(input_index, HUserRecord<HInstruction*>(this, before_use_node));
+      before_use_node = use_node;
+    }
+  }
+
+  void FixUpUserRecordsAfterUseRemoval(HUseList<HInstruction*>::iterator before_use_node) {
+    auto next = ++HUseList<HInstruction*>::iterator(before_use_node);
+    if (next != uses_.end()) {
+      HInstruction* next_user = next->GetUser();
+      size_t next_index = next->GetIndex();
+      DCHECK(next_user->InputRecordAt(next_index).GetInstruction() == this);
+      next_user->SetRawInputRecordAt(next_index, HUserRecord<HInstruction*>(this, before_use_node));
+    }
+  }
+
+  void FixUpUserRecordsAfterEnvUseInsertion(HUseList<HEnvironment*>::iterator env_fixup_end) {
+    auto before_env_use_node = env_uses_.before_begin();
+    for (auto env_use_node = env_uses_.begin(); env_use_node != env_fixup_end; ++env_use_node) {
+      HEnvironment* user = env_use_node->GetUser();
+      size_t input_index = env_use_node->GetIndex();
+      user->vregs_[input_index] = HUserRecord<HEnvironment*>(this, before_env_use_node);
+      before_env_use_node = env_use_node;
+    }
+  }
+
+  void FixUpUserRecordsAfterEnvUseRemoval(HUseList<HEnvironment*>::iterator before_env_use_node) {
+    auto next = ++HUseList<HEnvironment*>::iterator(before_env_use_node);
+    if (next != env_uses_.end()) {
+      HEnvironment* next_user = next->GetUser();
+      size_t next_index = next->GetIndex();
+      DCHECK(next_user->vregs_[next_index].GetInstruction() == this);
+      next_user->vregs_[next_index] = HUserRecord<HEnvironment*>(this, before_env_use_node);
+    }
+  }
 
   HInstruction* previous_;
   HInstruction* next_;
@@ -2390,6 +2320,107 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
+class HPhi : public HInstruction {
+ public:
+  HPhi(ArenaAllocator* arena,
+       uint32_t reg_number,
+       size_t number_of_inputs,
+       Primitive::Type type,
+       uint32_t dex_pc = kNoDexPc)
+      : HInstruction(SideEffects::None(), dex_pc),
+        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
+        reg_number_(reg_number) {
+    SetPackedField<TypeField>(ToPhiType(type));
+    DCHECK_NE(GetType(), Primitive::kPrimVoid);
+    // Phis are constructed live and marked dead if conflicting or unused.
+    // Individual steps of SsaBuilder should assume that if a phi has been
+    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+    SetPackedFlag<kFlagIsLive>(true);
+    SetPackedFlag<kFlagCanBeNull>(true);
+  }
+
+  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
+  static Primitive::Type ToPhiType(Primitive::Type type) {
+    return Primitive::PrimitiveKind(type);
+  }
+
+  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
+
+  size_t InputCount() const OVERRIDE { return inputs_.size(); }
+
+  void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
+
+  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(GetType() == new_type ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    SetPackedField<TypeField>(new_type);
+  }
+
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
+
+  uint32_t GetRegNumber() const { return reg_number_; }
+
+  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
+  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
+  bool IsDead() const { return !IsLive(); }
+  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
+
+  bool IsVRegEquivalentOf(HInstruction* other) const {
+    return other != nullptr
+        && other->IsPhi()
+        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->AsPhi()->GetRegNumber() == GetRegNumber();
+  }
+
+  // Returns the next equivalent phi (starting from the current one) or null if there is none.
+  // An equivalent phi is a phi having the same dex register and type.
+  // It assumes that phis with the same dex register are adjacent.
+  HPhi* GetNextEquivalentPhiWithSameType() {
+    HInstruction* next = GetNext();
+    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
+      if (next->GetType() == GetType()) {
+        return next->AsPhi();
+      }
+      next = next->GetNext();
+    }
+    return nullptr;
+  }
+
+  DECLARE_INSTRUCTION(Phi);
+
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
+    return inputs_[index];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_[index] = input;
+  }
+
+ private:
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
+  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
+  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+  ArenaVector<HUserRecord<HInstruction*> > inputs_;
+  const uint32_t reg_number_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPhi);
+};
+
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
@@ -3550,57 +3581,6 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-// A local in the graph. Corresponds to a Dex register.
-class HLocal : public HTemplateInstruction<0> {
- public:
-  explicit HLocal(uint16_t reg_number)
-      : HTemplateInstruction(SideEffects::None(), kNoDexPc), reg_number_(reg_number) {}
-
-  DECLARE_INSTRUCTION(Local);
-
-  uint16_t GetRegNumber() const { return reg_number_; }
-
- private:
-  // The Dex register number.
-  const uint16_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLocal);
-};
-
-// Load a given local. The local is an input of this instruction.
-class HLoadLocal : public HExpression<1> {
- public:
-  HLoadLocal(HLocal* local, Primitive::Type type, uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(LoadLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
-};
-
-// Store a value in a given local. This instruction has two inputs: the value
-// and the local.
-class HStoreLocal : public HTemplateInstruction<2> {
- public:
-  HStoreLocal(HLocal* local, HInstruction* value, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-    SetRawInputAt(1, value);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(StoreLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
-};
-
 class HNewInstance : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
@@ -3608,14 +3588,14 @@
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
-               bool can_throw,
+               bool needs_access_check,
                bool finalizable,
                QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
         entrypoint_(entrypoint) {
-    SetPackedFlag<kFlagCanThrow>(can_throw);
+    SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagFinalizable>(finalizable);
     SetRawInputAt(0, cls);
     SetRawInputAt(1, current_method);
@@ -3627,10 +3607,11 @@
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
 
-  // It may throw when called on type that's not instantiable/accessible.
-  // It can throw OOME.
-  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
-  bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>() || true; }
+  // Can throw errors when out-of-memory or if it's not instantiable/accessible.
+  bool CanThrow() const OVERRIDE { return true; }
+
+  // Needs to call into runtime to make sure it's instantiable/accessible.
+  bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
 
   bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); }
 
@@ -3647,8 +3628,8 @@
   DECLARE_INSTRUCTION(NewInstance);
 
  private:
-  static constexpr size_t kFlagCanThrow = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFlagFinalizable = kFlagCanThrow + 1;
+  static constexpr size_t kFlagNeedsAccessCheck = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagFinalizable = kFlagNeedsAccessCheck + 1;
   static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1;
   static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
@@ -3921,8 +3902,7 @@
                 // potentially one other if the clinit check is explicit, and potentially
                 // one other if the method is a string factory.
                 (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
-                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
-                    (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
@@ -4050,15 +4030,6 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    HInstruction* input = InputAt(index);
-    RemoveAsUserOfInput(index);
-    inputs_.pop_back();
-    return input;
-  }
-
   // Is this a call to a static method whose declaring class has an
   // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
@@ -4901,7 +4872,8 @@
                     SideEffectsForArchRuntimeCalls(input->GetType(), result_type),
                     dex_pc) {
     SetRawInputAt(0, input);
-    DCHECK_NE(input->GetType(), result_type);
+    // Invariant: We should never generate a conversion to a Boolean value.
+    DCHECK_NE(Primitive::kPrimBoolean, result_type);
   }
 
   HInstruction* GetInput() const { return InputAt(0); }
@@ -4935,115 +4907,6 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HPhi : public HInstruction {
- public:
-  HPhi(ArenaAllocator* arena,
-       uint32_t reg_number,
-       size_t number_of_inputs,
-       Primitive::Type type,
-       uint32_t dex_pc = kNoDexPc)
-      : HInstruction(SideEffects::None(), dex_pc),
-        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
-        reg_number_(reg_number) {
-    SetPackedField<TypeField>(ToPhiType(type));
-    DCHECK_NE(GetType(), Primitive::kPrimVoid);
-    // Phis are constructed live and marked dead if conflicting or unused.
-    // Individual steps of SsaBuilder should assume that if a phi has been
-    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
-    SetPackedFlag<kFlagIsLive>(true);
-    SetPackedFlag<kFlagCanBeNull>(true);
-  }
-
-  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
-  static Primitive::Type ToPhiType(Primitive::Type type) {
-    switch (type) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimChar:
-        return Primitive::kPrimInt;
-      default:
-        return type;
-    }
-  }
-
-  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
-
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
-  void AddInput(HInstruction* input);
-  void RemoveInputAt(size_t index);
-
-  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
-  void SetType(Primitive::Type new_type) {
-    // Make sure that only valid type changes occur. The following are allowed:
-    //  (1) int  -> float/ref (primitive type propagation),
-    //  (2) long -> double (primitive type propagation).
-    DCHECK(GetType() == new_type ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
-           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
-    SetPackedField<TypeField>(new_type);
-  }
-
-  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
-  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
-
-  uint32_t GetRegNumber() const { return reg_number_; }
-
-  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
-  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
-  bool IsDead() const { return !IsLive(); }
-  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
-
-  bool IsVRegEquivalentOf(HInstruction* other) const {
-    return other != nullptr
-        && other->IsPhi()
-        && other->AsPhi()->GetBlock() == GetBlock()
-        && other->AsPhi()->GetRegNumber() == GetRegNumber();
-  }
-
-  // Returns the next equivalent phi (starting from the current one) or null if there is none.
-  // An equivalent phi is a phi having the same dex register and type.
-  // It assumes that phis with the same dex register are adjacent.
-  HPhi* GetNextEquivalentPhiWithSameType() {
-    HInstruction* next = GetNext();
-    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
-      if (next->GetType() == GetType()) {
-        return next->AsPhi();
-      }
-      next = next->GetNext();
-    }
-    return nullptr;
-  }
-
-  DECLARE_INSTRUCTION(Phi);
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
- private:
-  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeSize =
-      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
-  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
-  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
-  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
-  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
-
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
-  const uint32_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPhi);
-};
-
 class HNullCheck : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
@@ -5211,14 +5074,8 @@
 
 class HArrayGet : public HExpression<2> {
  public:
-  HArrayGet(HInstruction* array,
-            HInstruction* index,
-            Primitive::Type type,
-            uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HExpression(type,
-                    SideEffects::ArrayReadOfType(type).Union(additional_side_effects),
-                    dex_pc) {
+  HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type, uint32_t dex_pc)
+      : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
@@ -5267,13 +5124,8 @@
             HInstruction* index,
             HInstruction* value,
             Primitive::Type expected_component_type,
-            uint32_t dex_pc,
-            SideEffects additional_side_effects = SideEffects::None())
-      : HTemplateInstruction(
-            SideEffects::ArrayWriteOfType(expected_component_type).Union(
-                SideEffectsForArchRuntimeCalls(value->GetType())).Union(
-                    additional_side_effects),
-            dex_pc) {
+            uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetPackedField<ExpectedComponentTypeField>(expected_component_type);
     SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == Primitive::kPrimNot);
     SetPackedFlag<kFlagValueCanBeNull>(true);
@@ -5281,6 +5133,8 @@
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
+    // Make a best guess now, may be refined during SSA building.
+    ComputeSideEffects();
   }
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5333,6 +5187,12 @@
     return GetPackedField<ExpectedComponentTypeField>();
   }
 
+  void ComputeSideEffects() {
+    Primitive::Type type = GetComponentType();
+    SetSideEffects(SideEffects::ArrayWriteOfType(type).Union(
+        SideEffectsForArchRuntimeCalls(type)));
+  }
+
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) {
     return (value_type == Primitive::kPrimNot) ? SideEffects::CanTriggerGC() : SideEffects::None();
   }
@@ -5375,9 +5235,22 @@
     return obj == InputAt(0);
   }
 
+  void MarkAsStringLength() { SetPackedFlag<kFlagIsStringLength>(); }
+  bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); }
+
   DECLARE_INSTRUCTION(ArrayLength);
 
  private:
+  // We treat a String as an array, creating the HArrayLength from String.length()
+  // or String.isEmpty() intrinsic in the instruction simplifier. We can always
+  // determine whether a particular HArrayLength is actually a String.length() by
+  // looking at the type of the input but that requires holding the mutator lock, so
+  // we prefer to use a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1;
+  static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayLength);
 };
 
@@ -5387,7 +5260,7 @@
   // constructor.
   HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
       : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
-    DCHECK(index->GetType() == Primitive::kPrimInt);
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
@@ -5411,7 +5284,7 @@
 
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
-  explicit HSuspendCheck(uint32_t dex_pc)
+  explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5557,32 +5430,117 @@
 
 class HLoadString : public HExpression<1> {
  public:
+  // Determines how to load the String.
+  enum class LoadKind {
+    // Use boot image String* address that will be known at link time.
+    // Used for boot image strings referenced by boot image code in non-PIC mode.
+    kBootImageLinkTimeAddress,
+
+    // Use PC-relative boot image String* address that will be known at link time.
+    // Used for boot image strings referenced by boot image code in PIC mode.
+    kBootImageLinkTimePcRelative,
+
+    // Use a known boot image String* address, embedded in the code by the codegen.
+    // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
+    // Note: codegen needs to emit a linker patch if indicated by compiler options'
+    // GetIncludePatchInformation().
+    kBootImageAddress,
+
+    // Load from the resolved strings array at an absolute address.
+    // Used for strings outside the boot image referenced by JIT-compiled code.
+    kDexCacheAddress,
+
+    // Load from resolved strings array in the dex cache using a PC-relative load.
+    // Used for strings outside boot image when we know that we can access
+    // the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Load from resolved strings array accessed through the class loaded from
+    // the compiled method's own ArtMethod*. This is the default access type when
+    // all other types are unavailable.
+    kDexCacheViaMethod,
+
+    kLast = kDexCacheViaMethod
+  };
+
   HLoadString(HCurrentMethod* current_method,
               uint32_t string_index,
-              uint32_t dex_pc,
-              bool is_in_dex_cache)
+              const DexFile& dex_file,
+              uint32_t dex_pc)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
         string_index_(string_index) {
-    SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
+    SetPackedFlag<kFlagIsInDexCache>(false);
+    SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
+    load_data_.ref.dex_file = &dex_file;
     SetRawInputAt(0, current_method);
   }
 
+  void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
+    DCHECK(HasAddress(load_kind));
+    load_data_.address = address;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithStringReference(LoadKind load_kind,
+                                      const DexFile& dex_file,
+                                      uint32_t string_index) {
+    DCHECK(HasStringReference(load_kind));
+    load_data_.ref.dex_file = &dex_file;
+    string_index_ = string_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
+                                        const DexFile& dex_file,
+                                        uint32_t element_index) {
+    DCHECK(HasDexCacheReference(load_kind));
+    load_data_.ref.dex_file = &dex_file;
+    load_data_.ref.dex_cache_element_index = element_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  LoadKind GetLoadKind() const {
+    return GetPackedField<LoadKindField>();
+  }
+
+  const DexFile& GetDexFile() const;
+
+  uint32_t GetStringIndex() const {
+    DCHECK(HasStringReference(GetLoadKind()) || /* For slow paths. */ !IsInDexCache());
+    return string_index_;
+  }
+
+  uint32_t GetDexCacheElementOffset() const;
+
+  uint64_t GetAddress() const {
+    DCHECK(HasAddress(GetLoadKind()));
+    return load_data_.address;
+  }
+
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return other->AsLoadString()->string_index_ == string_index_;
-  }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE;
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
-  uint32_t GetStringIndex() const { return string_index_; }
+  // Will call the runtime if we need to load the string through
+  // the dex cache and the string is not guaranteed to be there yet.
+  bool NeedsEnvironment() const OVERRIDE {
+    LoadKind load_kind = GetLoadKind();
+    if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kBootImageAddress) {
+      return false;
+    }
+    return !IsInDexCache();
+  }
 
-  // Will call the runtime if the string is not already in the dex cache.
-  bool NeedsEnvironment() const OVERRIDE { return !IsInDexCache(); }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+  }
 
-  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; }
   bool CanBeNull() const OVERRIDE { return false; }
-  bool CanThrow() const OVERRIDE { return !IsInDexCache(); }
+  bool CanThrow() const OVERRIDE { return NeedsEnvironment(); }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -5590,17 +5548,84 @@
 
   bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
 
+  void MarkInDexCache() {
+    SetPackedFlag<kFlagIsInDexCache>(true);
+    DCHECK(!NeedsEnvironment());
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+
+  size_t InputCount() const OVERRIDE {
+    return (InputAt(0) != nullptr) ? 1u : 0u;
+  }
+
+  void AddSpecialInput(HInstruction* special_input);
+
   DECLARE_INSTRUCTION(LoadString);
 
  private:
   static constexpr size_t kFlagIsInDexCache = kNumberOfExpressionPackedBits;
-  static constexpr size_t kNumberOfLoadStringPackedBits = kFlagIsInDexCache + 1;
+  static constexpr size_t kFieldLoadKind = kFlagIsInDexCache + 1;
+  static constexpr size_t kFieldLoadKindSize =
+      MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+  static constexpr size_t kNumberOfLoadStringPackedBits = kFieldLoadKind + kFieldLoadKindSize;
   static_assert(kNumberOfLoadStringPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
 
-  const uint32_t string_index_;
+  static bool HasStringReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kDexCacheViaMethod;
+  }
+
+  static bool HasAddress(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+  }
+
+  static bool HasDexCacheReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kDexCachePcRelative;
+  }
+
+  void SetLoadKindInternal(LoadKind load_kind);
+
+  // String index serves also as the hash code and it's also needed for slow-paths,
+  // so it must not be overwritten with other load data.
+  uint32_t string_index_;
+
+  union {
+    struct {
+      const DexFile* dex_file;            // For string reference and dex cache reference.
+      uint32_t dex_cache_element_index;   // Only for dex cache reference.
+    } ref;
+    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+  } load_data_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
+std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs);
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline const DexFile& HLoadString::GetDexFile() const {
+  DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind()))
+      << GetLoadKind();
+  return *load_data_.ref.dex_file;
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline uint32_t HLoadString::GetDexCacheElementOffset() const {
+  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
+  return load_data_.ref.dex_cache_element_index;
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
+  // The special input is used for PC-relative loads on some architectures.
+  DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+  DCHECK(InputAt(0) == nullptr);
+  SetRawInputAt(0u, special_input);
+  special_input->AddUseAt(this, 0);
+}
 
 /**
  * Performs an initialization check on its Class object input.
@@ -5767,7 +5792,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, obj);
     SetRawInputAt(1, value);
   }
@@ -5827,7 +5852,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, value);
   }
 
@@ -6552,74 +6577,6 @@
   FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
-class SwitchTable : public ValueObject {
- public:
-  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
-      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
-    int32_t table_offset = instruction.VRegB_31t();
-    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
-    if (sparse) {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-    } else {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-    }
-    num_entries_ = table[1];
-    values_ = reinterpret_cast<const int32_t*>(&table[2]);
-  }
-
-  uint16_t GetNumEntries() const {
-    return num_entries_;
-  }
-
-  void CheckIndex(size_t index) const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
-    }
-  }
-
-  int32_t GetEntryAt(size_t index) const {
-    CheckIndex(index);
-    return values_[index];
-  }
-
-  uint32_t GetDexPcForIndex(size_t index) const {
-    CheckIndex(index);
-    return dex_pc_ +
-        (reinterpret_cast<const int16_t*>(values_ + index) -
-         reinterpret_cast<const int16_t*>(&instruction_));
-  }
-
-  // Index of the first value in the table.
-  size_t GetFirstValueIndex() const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      return num_entries_;
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      return 1;
-    }
-  }
-
- private:
-  const Instruction& instruction_;
-  const uint32_t dex_pc_;
-
-  // Whether this is a sparse-switch table (or a packed-switch one).
-  const bool sparse_;
-
-  // This can't be const as it needs to be computed off of the given instruction, and complicated
-  // expressions in the initializer list seemed very ugly.
-  uint16_t num_entries_;
-
-  const int32_t* values_;
-
-  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
 // Create space in `blocks` for adding `number_of_new_blocks` entries
 // starting at location `at`. Blocks after `at` are moved accordingly.
 inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 764f5fe..d4e2a58 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -91,7 +91,7 @@
   entry->InsertInstructionBefore(to_insert, parameter2);
 
   ASSERT_TRUE(parameter1->HasUses());
-  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetUses().HasExactlyOneElement());
 }
 
 /**
@@ -115,7 +115,7 @@
   entry->AddInstruction(to_add);
 
   ASSERT_TRUE(parameter->HasUses());
-  ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter->GetUses().HasExactlyOneElement());
 }
 
 TEST(Node, ParentEnvironment) {
@@ -134,7 +134,7 @@
   entry->AddInstruction(new (&allocator) HExit());
 
   ASSERT_TRUE(parameter1->HasUses());
-  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetUses().HasExactlyOneElement());
 
   HEnvironment* environment = new (&allocator) HEnvironment(
       &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0, kStatic, with_environment);
@@ -145,7 +145,7 @@
   with_environment->SetRawEnvironment(environment);
 
   ASSERT_TRUE(parameter1->HasEnvironmentUses());
-  ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+  ASSERT_TRUE(parameter1->GetEnvUses().HasExactlyOneElement());
 
   HEnvironment* parent1 = new (&allocator) HEnvironment(
       &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0, kStatic, nullptr);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 2b0d522..a6d234d 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -32,7 +32,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class OptimizingCFITest : public CFITest {
  public:
@@ -54,7 +54,7 @@
     isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
     graph_ = CreateGraph(&allocator_);
     // Generate simple frame with some spills.
-    code_gen_.reset(CodeGenerator::Create(graph_, isa, *isa_features_, opts_));
+    code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
     code_gen_->GetAssembler()->cfi().SetEnabled(true);
     const int frame_size = 64;
     int core_reg = 0;
@@ -241,6 +241,6 @@
   Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 125c00d..37197af 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -259,8 +259,7 @@
   explicit OptimizingCompiler(CompilerDriver* driver);
   ~OptimizingCompiler();
 
-  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const
-      OVERRIDE;
+  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
 
   CompiledMethod* Compile(const DexFile::CodeItem* code_item,
                           uint32_t access_flags,
@@ -283,8 +282,6 @@
         InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
   }
 
-  void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
-
   void Init() OVERRIDE;
 
   void UnInit() const OVERRIDE;
@@ -365,12 +362,8 @@
   }
 }
 
-void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const {
-}
-
 bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
-                                          const DexFile& dex_file ATTRIBUTE_UNUSED,
-                                          CompilationUnit* cu ATTRIBUTE_UNUSED) const {
+                                          const DexFile& dex_file ATTRIBUTE_UNUSED) const {
   return true;
 }
 
@@ -604,9 +597,7 @@
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       ArrayRef<const SrcMapElem>(),
-      ArrayRef<const uint8_t>(),  // mapping_table.
       ArrayRef<const uint8_t>(stack_map),
-      ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
 
@@ -662,9 +653,15 @@
   }
 
   DexCompilationUnit dex_compilation_unit(
-    nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item,
-    class_def_idx, method_idx, access_flags,
-    nullptr, dex_cache);
+      class_loader,
+      Runtime::Current()->GetClassLinker(),
+      dex_file,
+      code_item,
+      class_def_idx,
+      method_idx,
+      access_flags,
+      /* verified_method */ nullptr,
+      dex_cache);
 
   bool requires_barrier = dex_compilation_unit.IsConstructor()
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
@@ -728,14 +725,20 @@
                             &dex_compilation_unit,
                             &dex_compilation_unit,
                             &dex_file,
+                            *code_item,
                             compiler_driver,
                             compilation_stats_.get(),
                             interpreter_metadata,
-                            dex_cache);
-      GraphAnalysisResult result = builder.BuildGraph(*code_item, &handles);
+                            dex_cache,
+                            &handles);
+      GraphAnalysisResult result = builder.BuildGraph();
       if (result != kAnalysisSuccess) {
         switch (result) {
+          case kAnalysisSkipped:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+            break;
           case kAnalysisInvalidBytecode:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
             break;
           case kAnalysisFailThrowCatchLoop:
             MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
@@ -766,15 +769,6 @@
   return codegen.release();
 }
 
-static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
-  // For access errors the compiler will use the unresolved helpers (e.g. HInvokeUnresolved).
-  uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
-  return (verified_method->GetEncounteredVerificationFailures() & (~unresolved_mask)) == 0;
-}
-
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
@@ -789,7 +783,8 @@
   const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
   DCHECK(!verified_method->HasRuntimeThrow());
   if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-      || CanHandleVerificationFailure(verified_method)) {
+      || verifier::MethodVerifier::CanCompilerHandleVerificationFailure(
+            verified_method->GetEncounteredVerificationFailures())) {
     ArenaAllocator arena(Runtime::Current()->GetArenaPool());
     CodeVectorAllocator code_allocator(&arena);
     std::unique_ptr<CodeGenerator> codegen(
@@ -862,6 +857,7 @@
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+  DCHECK(method->IsCompilable());
 
   jobject jclass_loader = class_loader.ToJObject();
   const DexFile* dex_file = method->GetDexFile();
@@ -911,9 +907,7 @@
   const void* code = code_cache->CommitCode(
       self,
       method,
-      nullptr,
       stack_map_data,
-      nullptr,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
@@ -947,13 +941,11 @@
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
         GetCompilerDriver()->GetInstructionSet(),
         GetCompilerDriver()->GetInstructionSetFeatures(),
         ArrayRef<const debug::MethodDebugInfo>(&info, 1));
-    CreateJITCodeEntryForAddress(code_address,
-                                 std::unique_ptr<const uint8_t[]>(elf_file.data()),
-                                 elf_file.size());
+    CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
   }
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, arena.BytesUsed());
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 3717926..9cc6ea4 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,8 @@
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
-  kNotCompiledBranchOutsideMethodCode,
+  kNotCompiledSkipped,
+  kNotCompiledInvalidBytecode,
   kNotCompiledThrowCatchLoop,
   kNotCompiledAmbiguousArrayOp,
   kNotCompiledHugeMethod,
@@ -115,7 +116,8 @@
       case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
       case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
       case kRemovedNullCheck: name = "RemovedNullCheck"; break;
-      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledSkipped: name = "NotCompiledSkipped"; break;
+      case kNotCompiledInvalidBytecode: name = "NotCompiledInvalidBytecode"; break;
       case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
       case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
       case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 0ca7305..dd5cb1c 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -91,8 +91,8 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScopeCollection handles(soa.Self());
-    HGraphBuilder builder(graph, return_type);
-    bool graph_built = (builder.BuildGraph(*item, &handles) == kAnalysisSuccess);
+    HGraphBuilder builder(graph, *item, &handles, return_type);
+    bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
     return graph_built ? graph : nullptr;
   }
 }
@@ -109,7 +109,8 @@
   std::string result = original;
   for (const auto& p : diff) {
     std::string::size_type pos = result.find(p.first);
-    EXPECT_NE(pos, std::string::npos);
+    DCHECK_NE(pos, std::string::npos)
+        << "Could not find: \"" << p.first << "\" in \"" << result << "\"";
     result.replace(pos, p.first.size(), p.second);
   }
   return result;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index d281a9f..dafbd3d 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -80,6 +80,15 @@
     HandleInvoke(invoke);
   }
 
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+      InitializePCRelativeBasePointer();
+      load_string->AddSpecialInput(base_);
+    }
+  }
+
   void BinaryFP(HBinaryOperation* bin) {
     HConstant* rhs = bin->InputAt(1)->AsConstant();
     if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index fc72727..dcc89e8 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -63,8 +63,8 @@
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
   // Try to find a static invoke or a new-instance from which this check originated.
   HInstruction* implicit_clinit = nullptr;
-  for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  for (const HUseListNode<HInstruction*>& use : check->GetUses()) {
+    HInstruction* user = use.GetUser();
     if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
         CanMoveClinitCheck(check, user)) {
       implicit_clinit = user;
@@ -85,11 +85,12 @@
   // If we found a static invoke or new-instance for merging, remove the check
   // from dominated static invokes.
   if (implicit_clinit != nullptr) {
-    for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
-      HInstruction* user = it.Current()->GetUser();
+    const HUseList<HInstruction*>& uses = check->GetUses();
+    for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+      HInstruction* user = it->GetUser();
       // All other uses must be dominated.
       DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
-      it.Advance();  // Advance before we remove the node, reference to the next node is preserved.
+      ++it;  // Advance before we remove the node, reference to the next node is preserved.
       if (user->IsInvokeStaticOrDirect()) {
         user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
@@ -159,7 +160,7 @@
 
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   if (condition->HasOnlyOneNonEnvironmentUse()) {
-    HInstruction* user = condition->GetUses().GetFirst()->GetUser();
+    HInstruction* user = condition->GetUses().front().GetUser();
     if (CanEmitConditionAt(condition, user)) {
       condition->MarkEmittedAtUseSite();
     }
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 429e6e3..ee32518 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -55,13 +55,13 @@
     if (instruction->HasUses()) {
       PrintString(" [");
       bool first = true;
-      for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+      for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
         if (first) {
           first = false;
         } else {
           PrintString(", ");
         }
-        PrintInt(it.Current()->GetUser()->GetId());
+        PrintInt(use.GetUser()->GetId());
       }
       PrintString("]");
     }
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index d5b95d2..951cdfb 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -44,27 +44,27 @@
 
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  0: ReturnVoid\n"
+      "  2: ReturnVoid\n"
       "BasicBlock 2, pred: 1\n"
-      "  1: Exit\n";
+      "  3: Exit\n";
 
   TestCode(data, expected);
 }
 
 TEST_F(PrettyPrinterTest, CFG1) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 2\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 3, pred: 2\n"
+      "  4: Exit\n";
 
   const uint16_t data[] =
     ZERO_REGISTER_CODE_ITEM(
@@ -76,17 +76,17 @@
 
 TEST_F(PrettyPrinterTest, CFG2) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: Goto 3\n"
-    "BasicBlock 3, pred: 2, succ: 4\n"
-    "  2: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: Goto 3\n"
+      "BasicBlock 3, pred: 2, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  5: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
@@ -98,17 +98,17 @@
 
 TEST_F(PrettyPrinterTest, CFG3) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  0: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  2: Goto 2\n"
-    "BasicBlock 4, pred: 2\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  3: Goto 2\n"
+      "BasicBlock 4, pred: 2\n"
+      "  5: Exit\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
@@ -134,14 +134,14 @@
 
 TEST_F(PrettyPrinterTest, CFG4) {
   const char* expected =
-    "BasicBlock 0, succ: 3\n"
-    "  2: SuspendCheck\n"
-    "  3: Goto 3\n"
-    "BasicBlock 1, pred: 3, 1, succ: 1\n"
-    "  5: SuspendCheck\n"
-    "  0: Goto 1\n"
-    "BasicBlock 3, pred: 0, succ: 1\n"
-    "  4: Goto 1\n";
+      "BasicBlock 0, succ: 3\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 3\n"
+      "BasicBlock 1, pred: 3, 1, succ: 1\n"
+      "  3: SuspendCheck\n"
+      "  4: Goto 1\n"
+      "BasicBlock 3, pred: 0, succ: 1\n"
+      "  0: Goto 1\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
@@ -157,13 +157,13 @@
 
 TEST_F(PrettyPrinterTest, CFG5) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  0: ReturnVoid\n"
-    "BasicBlock 3, pred: 1\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: ReturnVoid\n"
+      "BasicBlock 3, pred: 1\n"
+      "  3: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
@@ -175,21 +175,21 @@
 
 TEST_F(PrettyPrinterTest, CFG6) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant [5, 5]\n"
-    "  10: SuspendCheck\n"
-    "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  5: Equal(1, 1) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  8: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  9: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  12: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [4, 4]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  4: Equal(3, 3) [5]\n"
+      "  5: If(4)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  6: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  7: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  8: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -202,22 +202,22 @@
 
 TEST_F(PrettyPrinterTest, CFG7) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant [5, 5]\n"
-    "  10: SuspendCheck\n"
-    "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 6\n"
-    "  5: Equal(1, 1) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 6, 3, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 2\n"
-    "  14: SuspendCheck\n"
-    "  8: Goto 2\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 6, pred: 1, succ: 2\n"
-    "  13: Goto 2\n";
+      "BasicBlock 0, succ: 1\n"
+      "  4: IntConstant [5, 5]\n"
+      "  2: SuspendCheck\n"
+      "  3: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 6\n"
+      "  5: Equal(4, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 6, 3, succ: 3\n"
+      "  11: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 2\n"
+      "  8: SuspendCheck\n"
+      "  9: Goto 2\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n"
+      "BasicBlock 6, pred: 1, succ: 2\n"
+      "  1: Goto 2\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -230,14 +230,14 @@
 
 TEST_F(PrettyPrinterTest, IntConstant) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant\n"
-    "  5: SuspendCheck\n"
-    "  6: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  3: ReturnVoid\n"
-    "BasicBlock 2, pred: 1\n"
-    "  4: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 2, pred: 1\n"
+      "  4: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 95f10e0..f2394f6 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -23,6 +23,17 @@
 
 namespace art {
 
+static inline mirror::DexCache* FindDexCacheWithHint(Thread* self,
+                                                     const DexFile& dex_file,
+                                                     Handle<mirror::DexCache> hint_dex_cache)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(hint_dex_cache->GetDexFile() == &dex_file)) {
+    return hint_dex_cache.Get();
+  } else {
+    return Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file);
+  }
+}
+
 static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles,
                                                           ClassLinker::ClassRoot class_root,
                                                           ReferenceTypeInfo::TypeHandle* cache) {
@@ -35,6 +46,13 @@
   return *cache;
 }
 
+// Returns true if klass is admissible to the propagation: non-null and non-erroneous.
+// For an array type, we also check if the component type is admissible.
+static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return klass != nullptr && !klass->IsErroneous() &&
+      (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
+}
+
 ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
   return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
 }
@@ -54,10 +72,12 @@
 class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
  public:
   RTPVisitor(HGraph* graph,
+             Handle<mirror::DexCache> hint_dex_cache,
              HandleCache* handle_cache,
              ArenaVector<HInstruction*>* worklist,
              bool is_first_run)
     : HGraphDelegateVisitor(graph),
+      hint_dex_cache_(hint_dex_cache),
       handle_cache_(handle_cache),
       worklist_(worklist),
       is_first_run_(is_first_run) {}
@@ -70,7 +90,8 @@
   void VisitNewArray(HNewArray* instr) OVERRIDE;
   void VisitParameterValue(HParameterValue* instr) OVERRIDE;
   void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info);
-  void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact);
+  void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
   void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE;
@@ -86,16 +107,19 @@
                                bool is_exact);
 
  private:
+  Handle<mirror::DexCache> hint_dex_cache_;
   HandleCache* handle_cache_;
   ArenaVector<HInstruction*>* worklist_;
   const bool is_first_run_;
 };
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
+                                                   Handle<mirror::DexCache> hint_dex_cache,
                                                    StackHandleScopeCollection* handles,
                                                    bool is_first_run,
                                                    const char* name)
     : HOptimization(graph, name),
+      hint_dex_cache_(hint_dex_cache),
       handle_cache_(handles),
       worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)),
       is_first_run_(is_first_run) {
@@ -130,7 +154,7 @@
 }
 
 void ReferenceTypePropagation::Visit(HInstruction* instruction) {
-  RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_);
+  RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
   instruction->Accept(&visitor);
 }
 
@@ -149,7 +173,7 @@
 }
 
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_);
+  RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
   // Handle Phis first as there might be instructions in the same block who depend on them.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     VisitPhi(it.Current()->AsPhi());
@@ -187,8 +211,8 @@
   if (existing_bound_type->GetUpperBound().IsSupertypeOf(upper_bound)) {
     if (kIsDebugBuild) {
       // Check that the existing HBoundType dominates all the uses.
-      for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-        HInstruction* user = it.Current()->GetUser();
+      for (const HUseListNode<HInstruction*>& use : obj->GetUses()) {
+        HInstruction* user = use.GetUser();
         if (dominator_instr != nullptr) {
           DCHECK(!dominator_instr->StrictlyDominates(user)
               || user == existing_bound_type
@@ -242,8 +266,12 @@
       ? ifInstruction->IfTrueSuccessor()
       : ifInstruction->IfFalseSuccessor();
 
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  const HUseList<HInstruction*>& uses = obj->GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
     if (notNullBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
         ScopedObjectAccess soa(Thread::Current());
@@ -264,7 +292,7 @@
           break;
         }
       }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+      user->ReplaceInput(bound_type, index);
     }
   }
 }
@@ -358,7 +386,6 @@
   HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   {
-    ScopedObjectAccess soa(Thread::Current());
     if (!class_rti.IsValid()) {
       // He have loaded an unresolved class. Don't bother bounding the type.
       return;
@@ -379,8 +406,12 @@
     return;
   }
   DCHECK(!obj->IsLoadClass()) << "We should not replace HLoadClass instructions";
-  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  const HUseList<HInstruction*>& uses = obj->GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
     if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
         ScopedObjectAccess soa(Thread::Current());
@@ -396,7 +427,7 @@
           break;
         }
       }
-      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+      user->ReplaceInput(bound_type, index);
     }
   }
 }
@@ -409,10 +440,10 @@
     if (kIsDebugBuild) {
       HInvoke* invoke = instr->AsInvoke();
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      ScopedObjectAccess soa(Thread::Current());
-      StackHandleScope<2> hs(soa.Self());
+      Thread* self = Thread::Current();
+      StackHandleScope<2> hs(self);
       Handle<mirror::DexCache> dex_cache(
-          hs.NewHandle(cl->FindDexCache(soa.Self(), invoke->GetDexFile(), false)));
+          hs.NewHandle(FindDexCacheWithHint(self, invoke->GetDexFile(), hint_dex_cache_)));
       // Use a null loader. We should probably use the compiling method's class loader,
       // but then we would need to pass it to RTPVisitor just for this debug check. Since
       // the method is from the String class, the null loader is good enough.
@@ -429,8 +460,7 @@
     }
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
-  } else if (klass != nullptr) {
-    ScopedObjectAccess soa(Thread::Current());
+  } else if (IsAdmissible(klass)) {
     ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
     is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
@@ -446,8 +476,7 @@
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+  mirror::DexCache* dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
   // Get type from dex cache assuming it was populated by the verifier.
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
@@ -460,24 +489,24 @@
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
-static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
+static mirror::Class* GetClassFromDexCache(Thread* self,
+                                           const DexFile& dex_file,
+                                           uint16_t type_idx,
+                                           Handle<mirror::DexCache> hint_dex_cache)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true);
-  if (dex_cache == nullptr) {
-    // Dex cache could not be found. This should only happen during gtests.
-    return nullptr;
-  }
+  mirror::DexCache* dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache);
   // Get type from dex cache assuming it was populated by the verifier.
   return dex_cache->GetResolvedType(type_idx);
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) {
-  ScopedObjectAccess soa(Thread::Current());
   // We check if the existing type is valid: the inliner may have set it.
   if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-    mirror::Class* resolved_class =
-        GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(),
+                                                         instr->GetDexFile(),
+                                                         instr->GetTypeIndex(),
+                                                         hint_dex_cache_);
     SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false);
   }
 }
@@ -532,9 +561,11 @@
 void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // Get type from dex cache assuming it was populated by the verifier.
-  mirror::Class* resolved_class =
-      GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
-  if (resolved_class != nullptr) {
+  mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(),
+                                                       instr->GetDexFile(),
+                                                       instr->GetTypeIndex(),
+                                                       hint_dex_cache_);
+  if (IsAdmissible(resolved_class)) {
     instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
         handle_cache_->NewHandle(resolved_class), /* is_exact */ true));
   }
@@ -567,7 +598,6 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitNullCheck(HNullCheck* instr) {
-  ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
   if (parent_rti.IsValid()) {
     instr->SetReferenceTypeInfo(parent_rti);
@@ -575,10 +605,9 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
-  ScopedObjectAccess soa(Thread::Current());
-
   ReferenceTypeInfo class_rti = instr->GetUpperBound();
   if (class_rti.IsValid()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Narrow the type as much as possible.
     HInstruction* obj = instr->InputAt(0);
     ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
@@ -609,8 +638,6 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
-  ScopedObjectAccess soa(Thread::Current());
-
   HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
@@ -639,13 +666,6 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
-    if (!is_first_run_ && graph_->IsCompilingOsr()) {
-      // Don't update the type of a loop phi when compiling OSR: we may have done
-      // speculative optimizations dominating that phi, that do not hold at the
-      // point the interpreter jumps to that loop header.
-      return;
-    }
-    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
     HInstruction* first_input = phi->InputAt(0);
@@ -718,7 +738,7 @@
   }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
-  if (handle->IsObjectArrayClass()) {
+  if (handle->IsObjectArrayClass() && IsAdmissible(handle->GetComponentType())) {
     ReferenceTypeInfo::TypeHandle component_handle =
         handle_cache->NewHandle(handle->GetComponentType());
     bool is_exact = component_handle->CannotBeAssignedFromOtherTypes();
@@ -760,7 +780,8 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  mirror::DexCache* dex_cache = cl->FindDexCache(soa.Self(), instr->GetDexFile());
+  mirror::DexCache* dex_cache =
+      FindDexCacheWithHint(soa.Self(), instr->GetDexFile(), hint_dex_cache_);
   size_t pointer_size = cl->GetImagePointerSize();
   ArtMethod* method = dex_cache->GetResolvedMethod(instr->GetDexMethodIndex(), pointer_size);
   mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false, pointer_size);
@@ -887,8 +908,8 @@
 }
 
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
     if ((user->IsPhi() && user->AsPhi()->IsLive())
        || user->IsBoundType()
        || user->IsNullCheck()
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 028a6fc..2106be6 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -32,6 +32,7 @@
 class ReferenceTypePropagation : public HOptimization {
  public:
   ReferenceTypePropagation(HGraph* graph,
+                           Handle<mirror::DexCache> hint_dex_cache,
                            StackHandleScopeCollection* handles,
                            bool is_first_run,
                            const char* name = kReferenceTypePropagationPassName);
@@ -90,6 +91,10 @@
 
   void ValidateTypes();
 
+  // Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with
+  // graph_->GetDexFile(). Since we may look up also in other dex files, it's used only
+  // as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache().
+  Handle<mirror::DexCache> hint_dex_cache_;
   HandleCache handle_cache_;
 
   ArenaVector<HInstruction*> worklist_;
@@ -99,6 +104,8 @@
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
+  friend class ReferenceTypePropagationTest;
+
   DISALLOW_COPY_AND_ASSIGN(ReferenceTypePropagation);
 };
 
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
new file mode 100644
index 0000000..7649b50
--- /dev/null
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "nodes.h"
+#include "object_lock.h"
+#include "optimizing_unit_test.h"
+#include "reference_type_propagation.h"
+
+namespace art {
+
+/**
+ * Fixture class for unit testing the ReferenceTypePropagation phase. Used to verify the
+ * functionality of methods and situations that are hard to set up with checker tests.
+ */
+class ReferenceTypePropagationTest : public CommonCompilerTest {
+ public:
+  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
+
+  ~ReferenceTypePropagationTest() { }
+
+  void SetupPropagation(StackHandleScopeCollection* handles) {
+    graph_->InitializeInexactObjectRTI(handles);
+    propagation_ = new (&allocator_) ReferenceTypePropagation(graph_,
+                                                              Handle<mirror::DexCache>(),
+                                                              handles,
+                                                              true,
+                                                              "test_prop");
+  }
+
+  // Relay method to merge type in reference type propagation.
+  ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
+                               const ReferenceTypeInfo& b) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return propagation_->MergeTypes(a, b);
+  }
+
+  // Helper method to construct an invalid type.
+  ReferenceTypeInfo InvalidType() {
+    return ReferenceTypeInfo::CreateInvalid();
+  }
+
+  // Helper method to construct the Object type.
+  ReferenceTypeInfo ObjectType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetObjectClassHandle(), is_exact);
+  }
+
+  // Helper method to construct the String type.
+  ReferenceTypeInfo StringType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetStringClassHandle(), is_exact);
+  }
+
+  // General building fields.
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+
+  ReferenceTypePropagation* propagation_;
+};
+
+//
+// The actual ReferenceTypePropgation unit tests.
+//
+
+TEST_F(ReferenceTypePropagationTest, ProperSetup) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  EXPECT_TRUE(propagation_ != nullptr);
+  EXPECT_TRUE(graph_->GetInexactObjectRti().IsEqual(ObjectType(false)));
+}
+
+TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  // Two invalid types.
+  ReferenceTypeInfo t1(MergeTypes(InvalidType(), InvalidType()));
+  EXPECT_FALSE(t1.IsValid());
+  EXPECT_FALSE(t1.IsExact());
+  EXPECT_TRUE(t1.IsEqual(InvalidType()));
+
+  // Valid type on right.
+  ReferenceTypeInfo t2(MergeTypes(InvalidType(), ObjectType()));
+  EXPECT_TRUE(t2.IsValid());
+  EXPECT_TRUE(t2.IsExact());
+  EXPECT_TRUE(t2.IsEqual(ObjectType()));
+  ReferenceTypeInfo t3(MergeTypes(InvalidType(), StringType()));
+  EXPECT_TRUE(t3.IsValid());
+  EXPECT_TRUE(t3.IsExact());
+  EXPECT_TRUE(t3.IsEqual(StringType()));
+
+  // Valid type on left.
+  ReferenceTypeInfo t4(MergeTypes(ObjectType(), InvalidType()));
+  EXPECT_TRUE(t4.IsValid());
+  EXPECT_TRUE(t4.IsExact());
+  EXPECT_TRUE(t4.IsEqual(ObjectType()));
+  ReferenceTypeInfo t5(MergeTypes(StringType(), InvalidType()));
+  EXPECT_TRUE(t5.IsValid());
+  EXPECT_TRUE(t5.IsExact());
+  EXPECT_TRUE(t5.IsEqual(StringType()));
+}
+
+TEST_F(ReferenceTypePropagationTest, MergeValidTypes) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  SetupPropagation(&handles);
+
+  // Same types.
+  ReferenceTypeInfo t1(MergeTypes(ObjectType(), ObjectType()));
+  EXPECT_TRUE(t1.IsValid());
+  EXPECT_TRUE(t1.IsExact());
+  EXPECT_TRUE(t1.IsEqual(ObjectType()));
+  ReferenceTypeInfo t2(MergeTypes(StringType(), StringType()));
+  EXPECT_TRUE(t2.IsValid());
+  EXPECT_TRUE(t2.IsExact());
+  EXPECT_TRUE(t2.IsEqual(StringType()));
+
+  // Left is super class of right.
+  ReferenceTypeInfo t3(MergeTypes(ObjectType(), StringType()));
+  EXPECT_TRUE(t3.IsValid());
+  EXPECT_FALSE(t3.IsExact());
+  EXPECT_TRUE(t3.IsEqual(ObjectType(false)));
+
+  // Right is super class of left.
+  ReferenceTypeInfo t4(MergeTypes(StringType(), ObjectType()));
+  EXPECT_TRUE(t4.IsValid());
+  EXPECT_FALSE(t4.IsExact());
+  EXPECT_TRUE(t4.IsEqual(ObjectType(false)));
+
+  // Same types, but one or both are inexact.
+  ReferenceTypeInfo t5(MergeTypes(ObjectType(false), ObjectType()));
+  EXPECT_TRUE(t5.IsValid());
+  EXPECT_FALSE(t5.IsExact());
+  EXPECT_TRUE(t5.IsEqual(ObjectType(false)));
+  ReferenceTypeInfo t6(MergeTypes(ObjectType(), ObjectType(false)));
+  EXPECT_TRUE(t6.IsValid());
+  EXPECT_FALSE(t6.IsExact());
+  EXPECT_TRUE(t6.IsEqual(ObjectType(false)));
+  ReferenceTypeInfo t7(MergeTypes(ObjectType(false), ObjectType(false)));
+  EXPECT_TRUE(t7.IsValid());
+  EXPECT_FALSE(t7.IsExact());
+  EXPECT_TRUE(t7.IsEqual(ObjectType(false)));
+}
+
+}  // namespace art
+
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index b1f9cbc..4405b80 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1773,7 +1773,9 @@
     // therefore will not have a location for that instruction for `to`.
     // Because the instruction is a constant or the ArtMethod, we don't need to
     // do anything: it will be materialized in the irreducible loop.
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+        << defined_by->DebugName() << ":" << defined_by->GetId()
+        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
     return;
   }
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 5e1d1d9..08bd35f 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -16,11 +16,20 @@
 
 #include "sharpening.h"
 
+#include "base/casts.h"
+#include "class_linker.h"
 #include "code_generator.h"
+#include "driver/dex_compilation_unit.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "driver/compiler_driver.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "handle_scope-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/string.h"
 #include "nodes.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -31,12 +40,13 @@
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
         ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+      } else if (instruction->IsLoadString()) {
+        ProcessLoadString(instruction->AsLoadString());
       }
       // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
       //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
       //       because we know the type better when inlining.
-      // TODO: HLoadClass, HLoadString - select PC relative dex cache array access if
-      //       available.
+      // TODO: HLoadClass - select better load kind if available.
     }
   }
 }
@@ -89,7 +99,7 @@
     if (direct_method != 0u) {  // Should we use a direct pointer to the method?
       // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
       // kDirectAddress would be fine for image methods, we don't support it at the moment.
-      DCHECK(!Runtime::Current()->UseJit());
+      DCHECK(!Runtime::Current()->UseJitCompilation());
       if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
         method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
         method_load_data = direct_method;
@@ -99,7 +109,7 @@
     } else {  // Use dex cache.
       DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
       if (use_pc_relative_instructions) {  // Can we use PC-relative access to the dex cache arrays?
-        DCHECK(!Runtime::Current()->UseJit());
+        DCHECK(!Runtime::Current()->UseJitCompilation());
         method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
         DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
                                     &graph_->GetDexFile());
@@ -111,7 +121,7 @@
     if (direct_code != 0u) {  // Should we use a direct pointer to the code?
       // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
       // while kCallDirect would be fine for image methods, we don't support it at the moment.
-      DCHECK(!Runtime::Current()->UseJit());
+      DCHECK(!Runtime::Current()->UseJitCompilation());
       if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
         code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
         direct_code_ptr = direct_code;
@@ -143,4 +153,101 @@
   invoke->SetDispatchInfo(dispatch_info);
 }
 
+void HSharpening::ProcessLoadString(HLoadString* load_string) {
+  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(!load_string->IsInDexCache());
+
+  const DexFile& dex_file = load_string->GetDexFile();
+  uint32_t string_index = load_string->GetStringIndex();
+
+  bool is_in_dex_cache = false;
+  HLoadString::LoadKind desired_load_kind;
+  uint64_t address = 0u;  // String or dex cache element address.
+  {
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+        ? compilation_unit_.GetDexCache()
+        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+
+    if (compiler_driver_->IsBootImage()) {
+      // Compiling boot image. Resolve the string and allocate it if needed.
+      DCHECK(!runtime->UseJitCompilation());
+      mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
+      CHECK(string != nullptr);
+      if (!compiler_driver_->GetSupportBootImageFixup()) {
+        // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
+        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+      } else {
+        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+        is_in_dex_cache = true;
+        desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+            ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
+            : HLoadString::LoadKind::kBootImageLinkTimeAddress;
+      }
+    } else if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      mirror::String* string = dex_cache->GetResolvedString(string_index);
+      is_in_dex_cache = (string != nullptr);
+      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(string);
+      } else {
+        // Note: If the string is not in the dex cache, the instruction needs environment
+        // and will not be inlined across dex files. Within a dex file, the slow-path helper
+        // loads the correct string and inlined frames are used correctly for OOM stack trace.
+        // TODO: Write a test for this.
+        desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
+        void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
+        address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+      }
+    } else {
+      // AOT app compilation. Try to lookup the string without allocating if not found.
+      mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+        if (codegen_->GetCompilerOptions().GetCompilePic()) {
+          // Use PC-relative load from the dex cache if the dex file belongs
+          // to the oat file that we're currently compiling.
+          desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
+              ? HLoadString::LoadKind::kDexCachePcRelative
+              : HLoadString::LoadKind::kDexCacheViaMethod;
+        } else {
+          desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+          address = reinterpret_cast64<uint64_t>(string);
+        }
+      } else {
+        // Not JIT and the string is not in boot image.
+        desired_load_kind = HLoadString::LoadKind::kDexCachePcRelative;
+      }
+    }
+  }
+  if (is_in_dex_cache) {
+    load_string->MarkInDexCache();
+  }
+
+  HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK_NE(address, 0u);
+      load_string->SetLoadKindWithAddress(load_kind, address);
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      DexCacheArraysLayout layout(pointer_size, &dex_file);
+      size_t element_index = layout.StringOffset(string_index);
+      load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
+      break;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index adae700..24152f6 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -47,6 +47,7 @@
 
  private:
   void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
+  void ProcessLoadString(HLoadString* load_string);
 
   CodeGenerator* codegen_;
   const DexCompilationUnit& compilation_unit_;
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index 9bbc354..b01bc1c 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -148,19 +148,19 @@
   EXPECT_FALSE(any_write.MayDependOn(volatile_read));
 }
 
-TEST(SideEffectsTest, SameWidthTypes) {
+TEST(SideEffectsTest, SameWidthTypesNoAlias) {
   // Type I/F.
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false),
       SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
       SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   // Type L/D.
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false),
       SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false));
-  testWriteAndReadDependence(
+  testNoWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
 }
@@ -216,14 +216,32 @@
       "||||||L|",
       SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str());
   EXPECT_STREQ(
+      "||DFJISCBZL|DFJISCBZL||DFJISCBZL|DFJISCBZL|",
+      SideEffects::FieldWriteOfType(Primitive::kPrimNot, true).ToString().c_str());
+  EXPECT_STREQ(
       "|||||Z||",
       SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str());
   EXPECT_STREQ(
+      "|||||C||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimChar).ToString().c_str());
+  EXPECT_STREQ(
+      "|||||S||",
+      SideEffects::ArrayWriteOfType(Primitive::kPrimShort).ToString().c_str());
+  EXPECT_STREQ(
       "|||B||||",
       SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str());
   EXPECT_STREQ(
-      "||DJ|||||",  // note: DJ alias
+      "||D|||||",
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
+  EXPECT_STREQ(
+      "||J|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimLong).ToString().c_str());
+  EXPECT_STREQ(
+      "||F|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimFloat).ToString().c_str());
+  EXPECT_STREQ(
+      "||I|||||",
+      SideEffects::ArrayReadOfType(Primitive::kPrimInt).ToString().c_str());
   SideEffects s = SideEffects::None();
   s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false));
   s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false));
@@ -231,9 +249,7 @@
   s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
-  EXPECT_STREQ(
-      "||DFJI|FI||S|DJC|",   // note: DJ/FI alias.
-      s.ToString().c_str());
+  EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 09ca8b7..f96ca32 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -16,36 +16,16 @@
 
 #include "ssa_builder.h"
 
+#include "bytecode_utils.h"
 #include "nodes.h"
 #include "reference_type_propagation.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-void SsaBuilder::SetLoopHeaderPhiInputs() {
-  for (size_t i = loop_headers_.size(); i > 0; --i) {
-    HBasicBlock* block = loop_headers_[i - 1];
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      size_t vreg = phi->GetRegNumber();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* value = ValueOfLocal(predecessor, vreg);
-        if (value == nullptr) {
-          // Vreg is undefined at this predecessor. Mark it dead and leave with
-          // fewer inputs than predecessors. SsaChecker will fail if not removed.
-          phi->SetDead();
-          break;
-        } else {
-          phi->AddInput(value);
-        }
-      }
-    }
-  }
-}
-
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* equality_instr = it.Current();
       if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
@@ -70,14 +50,14 @@
       // can only be the 0 constant.
       DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName();
       DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue());
-      equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0);
+      equality_instr->ReplaceInput(graph_->GetNullConstant(), int_operand == right ? 1 : 0);
     }
   }
 }
 
 void SsaBuilder::EquivalentPhisCleanup() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
@@ -99,7 +79,7 @@
 }
 
 void SsaBuilder::FixEnvironmentPhis() {
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
       HPhi* phi = it_phis.Current()->AsPhi();
@@ -128,8 +108,8 @@
   // marked dead/conflicting too, so we add them to the worklist. Otherwise we
   // add users whose type does not match and needs to be updated.
   bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead();
-  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HInstruction* user = it.Current()->GetUser();
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
     if (user->IsPhi() && user->AsPhi()->IsLive()) {
       if (add_all_live_phis || user->GetType() != instruction->GetType()) {
         worklist->push_back(user->AsPhi());
@@ -253,9 +233,9 @@
 }
 
 void SsaBuilder::RunPrimitiveTypePropagation() {
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
 
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
@@ -299,8 +279,14 @@
 static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
   Primitive::Type type = aget->GetType();
   DCHECK(Primitive::IsIntOrLongType(type));
-  HArrayGet* next = aget->GetNext()->AsArrayGet();
-  return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+  HInstruction* next = aget->GetNext();
+  if (next != nullptr && next->IsArrayGet()) {
+    HArrayGet* next_aget = next->AsArrayGet();
+    if (next_aget->IsEquivalentOf(aget)) {
+      return next_aget;
+    }
+  }
+  return nullptr;
 }
 
 static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
@@ -333,7 +319,7 @@
   // uses (because they are untyped) and environment uses (if --debuggable).
   // After resolving all ambiguous ArrayGets, we will re-run primitive type
   // propagation on the Phis which need to be updated.
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
 
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -405,6 +391,9 @@
             worklist.push_back(equivalent->AsPhi());
           }
         }
+        // Refine the side effects of this floating point aset. Note that we do this even if
+        // no replacement occurs, since the right-hand-side may have been corrected already.
+        aset->ComputeSideEffects();
       } else {
         // Array elements are integral and the value assigned to it initially
         // was integral too. Nothing to do.
@@ -423,27 +412,24 @@
 }
 
 static bool HasAliasInEnvironments(HInstruction* instruction) {
-  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
-       !use_it.Done();
-       use_it.Advance()) {
-    HEnvironment* use = use_it.Current()->GetUser();
-    HUseListNode<HEnvironment*>* next = use_it.Current()->GetNext();
-    if (next != nullptr && next->GetUser() == use) {
+  HEnvironment* last_user = nullptr;
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    DCHECK(use.GetUser() != nullptr);
+    // Note: The first comparison (== null) always fails.
+    if (use.GetUser() == last_user) {
       return true;
     }
+    last_user = use.GetUser();
   }
 
   if (kIsDebugBuild) {
     // Do a quadratic search to ensure same environment uses are next
     // to each other.
-    for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
-         !use_it.Done();
-         use_it.Advance()) {
-      HUseListNode<HEnvironment*>* current = use_it.Current();
-      HUseListNode<HEnvironment*>* next = current->GetNext();
-      while (next != nullptr) {
+    const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+    for (auto current = env_uses.begin(), end = env_uses.end(); current != end; ++current) {
+      auto next = current;
+      for (++next; next != end; ++next) {
         DCHECK(next->GetUser() != current->GetUser());
-        next = next->GetNext();
       }
     }
   }
@@ -451,18 +437,21 @@
 }
 
 void SsaBuilder::RemoveRedundantUninitializedStrings() {
-  if (GetGraph()->IsDebuggable()) {
+  if (graph_->IsDebuggable()) {
     // Do not perform the optimization for consistency with the interpreter
     // which always allocates an object for new-instance of String.
     return;
   }
 
   for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsInBlock());
+    DCHECK(new_instance->IsStringAlloc());
+
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
     if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
-      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->ReplaceWith(graph_->GetNullConstant());
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
       // Remove LoadClass if not needed any more.
@@ -493,57 +482,47 @@
 }
 
 GraphAnalysisResult SsaBuilder::BuildSsa() {
-  DCHECK(!GetGraph()->IsInSsaForm());
+  DCHECK(!graph_->IsInSsaForm());
 
-  // 1) Visit in reverse post order. We need to have all predecessors of a block
-  // visited (with the exception of loops) in order to create the right environment
-  // for that block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop header phis.
-  SetLoopHeaderPhiInputs();
-
-  // 3) Propagate types of phis. At this point, phis are typed void in the general
+  // 1) Propagate types of phis. At this point, phis are typed void in the general
   // case, or float/double/reference if we created an equivalent phi. So we need
   // to propagate the types across phis to give them a correct type. If a type
   // conflict is detected in this stage, the phi is marked dead.
   RunPrimitiveTypePropagation();
 
-  // 4) Now that the correct primitive types have been assigned, we can get rid
+  // 2) Now that the correct primitive types have been assigned, we can get rid
   // of redundant phis. Note that we cannot do this phase before type propagation,
   // otherwise we could get rid of phi equivalents, whose presence is a requirement
   // for the type propagation phase. Note that this is to satisfy statement (a)
   // of the SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination(GetGraph()).Run();
+  SsaRedundantPhiElimination(graph_).Run();
 
-  // 5) Fix the type for null constants which are part of an equality comparison.
+  // 3) Fix the type for null constants which are part of an equality comparison.
   // We need to do this after redundant phi elimination, to ensure the only cases
   // that we can see are reference comparison against 0. The redundant phi
   // elimination ensures we do not see a phi taking two 0 constants in a HEqual
   // or HNotEqual.
   FixNullConstantType();
 
-  // 6) Compute type of reference type instructions. The pass assumes that
+  // 4) Compute type of reference type instructions. The pass assumes that
   // NullConstant has been fixed up.
-  ReferenceTypePropagation(GetGraph(), handles_, /* is_first_run */ true).Run();
+  ReferenceTypePropagation(graph_, dex_cache_, handles_, /* is_first_run */ true).Run();
 
-  // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
-  // or long/double) and marked ArraySets with ambiguous input type. Now that RTP
-  // computed the type of the array input, the ambiguity can be resolved and the
-  // correct equivalents kept.
+  // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+  // (int/float or long/double) and marked ArraySets with ambiguous input type.
+  // Now that RTP computed the type of the array input, the ambiguity can be
+  // resolved and the correct equivalents kept.
   if (!FixAmbiguousArrayOps()) {
     return kAnalysisFailAmbiguousArrayOp;
   }
 
-  // 8) Mark dead phis. This will mark phis which are not used by instructions
+  // 6) Mark dead phis. This will mark phis which are not used by instructions
   // or other live phis. If compiling as debuggable code, phis will also be kept
   // live if they have an environment use.
-  SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+  SsaDeadPhiElimination dead_phi_elimimation(graph_);
   dead_phi_elimimation.MarkDeadPhis();
 
-  // 9) Make sure environments use the right phi equivalent: a phi marked dead
+  // 7) Make sure environments use the right phi equivalent: a phi marked dead
   // can have a phi equivalent that is not dead. In that case we have to replace
   // it with the live equivalent because deoptimization and try/catch rely on
   // environments containing values of all live vregs at that point. Note that
@@ -552,166 +531,26 @@
   // environments to just reference one.
   FixEnvironmentPhis();
 
-  // 10) Now that the right phis are used for the environments, we can eliminate
+  // 8) Now that the right phis are used for the environments, we can eliminate
   // phis we do not need. Regardless of the debuggable status, this phase is
   /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
   // as for the code generation, which does not deal with phis of conflicting
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Step 1) replaced uses of NewInstances of String with the results of
-  // their corresponding StringFactory calls. Unless the String objects are used
-  // before they are initialized, they can be replaced with NullConstant.
-  // Note that this optimization is valid only if unsimplified code does not use
-  // the uninitialized value because we assume execution can be deoptimized at
-  // any safepoint. We must therefore perform it before any other optimizations.
+  // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+  // results of their corresponding StringFactory calls. Unless the String
+  // objects are used before they are initialized, they can be replaced with
+  // NullConstant. Note that this optimization is valid only if unsimplified
+  // code does not use the uninitialized value because we assume execution can
+  // be deoptimized at any safepoint. We must therefore perform it before any
+  // other optimizations.
   RemoveRedundantUninitializedStrings();
 
-  // 12) Clear locals.
-  for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
-       !it.Done();
-       it.Advance()) {
-    HInstruction* current = it.Current();
-    if (current->IsLocal()) {
-      current->GetBlock()->RemoveInstruction(current);
-    }
-  }
-
-  GetGraph()->SetInSsaForm();
+  graph_->SetInSsaForm();
   return kAnalysisSuccess;
 }
 
-ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
-  const size_t vregs = GetGraph()->GetNumberOfVRegs();
-  if (locals->empty() && vregs != 0u) {
-    locals->resize(vregs, nullptr);
-
-    if (block->IsCatchBlock()) {
-      ArenaAllocator* arena = GetGraph()->GetArena();
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        HInstruction* current_local_value = (*current_locals_)[i];
-        if (current_local_value != nullptr) {
-          HPhi* phi = new (arena) HPhi(
-              arena,
-              i,
-              0,
-              current_local_value->GetType());
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
-      }
-    }
-  }
-  return locals;
-}
-
-HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  return (*locals)[local];
-}
-
-void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
-  current_locals_ = GetLocalsFor(block);
-
-  if (block->IsCatchBlock()) {
-    // Catch phis were already created and inputs collected from throwing sites.
-    if (kIsDebugBuild) {
-      // Make sure there was at least one throwing instruction which initialized
-      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
-      // visited already (from HTryBoundary scoping and reverse post order).
-      bool throwing_instruction_found = false;
-      bool catch_block_visited = false;
-      for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-        HBasicBlock* current = it.Current();
-        if (current == block) {
-          catch_block_visited = true;
-        } else if (current->IsTryBlock() &&
-                   current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) {
-          DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
-          throwing_instruction_found |= current->HasThrowingInstructions();
-        }
-      }
-      DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block.";
-    }
-  } else if (block->IsLoopHeader()) {
-    // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in reverse post order. We create phis for all initialized
-    // locals from the pre header. Their inputs will be populated at the end of
-    // the analysis.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
-      if (incoming != nullptr) {
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            0,
-            incoming->GetType());
-        block->AddPhi(phi);
-        (*current_locals_)[local] = phi;
-      }
-    }
-    // Save the loop header so that the last phase of the analysis knows which
-    // blocks need to be updated.
-    loop_headers_.push_back(block);
-  } else if (block->GetPredecessors().size() > 0) {
-    // All predecessors have already been visited because we are visiting in reverse post order.
-    // We merge the values of all locals, creating phis if those values differ.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      bool one_predecessor_has_no_value = false;
-      bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
-
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* current = ValueOfLocal(predecessor, local);
-        if (current == nullptr) {
-          one_predecessor_has_no_value = true;
-          break;
-        } else if (current != value) {
-          is_different = true;
-        }
-      }
-
-      if (one_predecessor_has_no_value) {
-        // If one predecessor has no value for this local, we trust the verifier has
-        // successfully checked that there is a store dominating any read after this block.
-        continue;
-      }
-
-      if (is_different) {
-        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            block->GetPredecessors().size(),
-            first_input->GetType());
-        for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
-          phi->SetRawInputAt(i, pred_value);
-        }
-        block->AddPhi(phi);
-        value = phi;
-      }
-      (*current_locals_)[local] = value;
-    }
-  }
-
-  // Visit all instructions. The instructions of interest are:
-  // - HLoadLocal: replace them with the current value of the local.
-  // - HStoreLocal: update current value of the local and remove the instruction.
-  // - Instructions that require an environment: populate their environment
-  //   with the current values of the locals.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    it.Current()->Accept(this);
-  }
-}
-
 /**
  * Constants in the Dex format are not typed. So the builder types them as
  * integers, but when doing the SSA form, we might realize the constant
@@ -722,11 +561,10 @@
   // We place the floating point constant next to this constant.
   HFloatConstant* result = constant->GetNext()->AsFloatConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
+    float value = bit_cast<float, int32_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HFloatConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheFloatConstant(result);
+    graph_->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -745,11 +583,10 @@
   // We place the floating point constant next to this constant.
   HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
+    double value = bit_cast<double, int64_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HDoubleConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheDoubleConstant(result);
+    graph_->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -780,7 +617,7 @@
   if (next == nullptr
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
-    ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
+    ArenaAllocator* allocator = graph_->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
       // Copy the inputs. Note that the graph may not be correctly typed
@@ -840,7 +677,7 @@
 
 HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
   if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) {
-    return value->GetBlock()->GetGraph()->GetNullConstant();
+    return graph_->GetNullConstant();
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
   } else {
@@ -848,144 +685,4 @@
   }
 }
 
-void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  Primitive::Type load_type = load->GetType();
-  HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
-  // If the operation requests a specific type, we make sure its input is of that type.
-  if (load_type != value->GetType()) {
-    if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(value, load_type);
-    } else if (load_type == Primitive::kPrimNot) {
-      value = GetReferenceTypeEquivalent(value);
-    }
-  }
-
-  load->ReplaceWith(value);
-  load->GetBlock()->RemoveInstruction(load);
-}
-
-void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  uint32_t reg_number = store->GetLocal()->GetRegNumber();
-  HInstruction* stored_value = store->InputAt(1);
-  Primitive::Type stored_type = stored_value->GetType();
-  DCHECK_NE(stored_type, Primitive::kPrimVoid);
-
-  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
-  // registers. Consider the following cases:
-  // (1) Storing a wide value must overwrite previous values in both `reg_number`
-  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
-  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
-  //     must invalidate it. We store `nullptr` in `reg_number-1`.
-  // Consequently, storing a wide value into the high vreg of another wide value
-  // will invalidate both `reg_number-1` and `reg_number+1`.
-
-  if (reg_number != 0) {
-    HInstruction* local_low = (*current_locals_)[reg_number - 1];
-    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
-      // The vreg we are storing into was previously the high vreg of a pair.
-      // We need to invalidate its low vreg.
-      DCHECK((*current_locals_)[reg_number] == nullptr);
-      (*current_locals_)[reg_number - 1] = nullptr;
-    }
-  }
-
-  (*current_locals_)[reg_number] = stored_value;
-  if (Primitive::Is64BitType(stored_type)) {
-    // We are storing a pair. Invalidate the instruction in the high vreg.
-    (*current_locals_)[reg_number + 1] = nullptr;
-  }
-
-  store->GetBlock()->RemoveInstruction(store);
-}
-
-void SsaBuilder::VisitInstruction(HInstruction* instruction) {
-  if (instruction->NeedsEnvironment()) {
-    HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-        GetGraph()->GetArena(),
-        current_locals_->size(),
-        GetGraph()->GetDexFile(),
-        GetGraph()->GetMethodIdx(),
-        instruction->GetDexPc(),
-        GetGraph()->GetInvokeType(),
-        instruction);
-    environment->CopyFrom(*current_locals_);
-    instruction->SetRawEnvironment(environment);
-  }
-
-  // If in a try block, propagate values of locals into catch blocks.
-  if (instruction->CanThrowIntoCatchBlock()) {
-    const HTryBoundary& try_entry =
-        instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
-      ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
-      DCHECK_EQ(handler_locals->size(), current_locals_->size());
-      for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-        HInstruction* handler_value = (*handler_locals)[vreg];
-        if (handler_value == nullptr) {
-          // Vreg was undefined at a previously encountered throwing instruction
-          // and the catch phi was deleted. Do not record the local value.
-          continue;
-        }
-        DCHECK(handler_value->IsPhi());
-
-        HInstruction* local_value = (*current_locals_)[vreg];
-        if (local_value == nullptr) {
-          // This is the first instruction throwing into `catch_block` where
-          // `vreg` is undefined. Delete the catch phi.
-          catch_block->RemovePhi(handler_value->AsPhi());
-          (*handler_locals)[vreg] = nullptr;
-        } else {
-          // Vreg has been defined at all instructions throwing into `catch_block`
-          // encountered so far. Record the local value in the catch phi.
-          handler_value->AsPhi()->AddInput(local_value);
-        }
-      }
-    }
-  }
-}
-
-void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
-  Primitive::Type type = aget->GetType();
-  DCHECK(!Primitive::IsFloatingPointType(type));
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_agets_.push_back(aget);
-  }
-  VisitInstruction(aget);
-}
-
-void SsaBuilder::VisitArraySet(HArraySet* aset) {
-  Primitive::Type type = aset->GetValue()->GetType();
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_asets_.push_back(aset);
-  }
-  VisitInstruction(aset);
-}
-
-void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  VisitInstruction(invoke);
-
-  if (invoke->IsStringInit()) {
-    // This is a StringFactory call which acts as a String constructor. Its
-    // result replaces the empty String pre-allocated by NewInstance.
-    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
-
-    // Replacing the NewInstance might render it redundant. Keep a list of these
-    // to be visited once it is clear whether it is has remaining uses.
-    if (arg_this->IsNewInstance()) {
-      uninitialized_strings_.push_back(arg_this->AsNewInstance());
-    } else {
-      DCHECK(arg_this->IsPhi());
-      // NewInstance is not the direct input of the StringFactory call. It might
-      // be redundant but optimizing this case is not worth the effort.
-    }
-
-    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
-    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == arg_this) {
-        (*current_locals_)[vreg] = invoke;
-      }
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2dae9c2..d7360ad 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -23,8 +23,6 @@
 
 namespace art {
 
-static constexpr int kDefaultNumberOfLoops = 2;
-
 /**
  * Transforms a graph into SSA form. The liveness guarantees of
  * this transformation are listed below. A DEX register
@@ -47,37 +45,51 @@
  *     is not set, values of Dex registers only used by environments
  *     are killed.
  */
-class SsaBuilder : public HGraphVisitor {
+class SsaBuilder : public ValueObject {
  public:
-  SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
-      : HGraphVisitor(graph),
+  SsaBuilder(HGraph* graph,
+             Handle<mirror::DexCache> dex_cache,
+             StackHandleScopeCollection* handles)
+      : graph_(graph),
+        dex_cache_(dex_cache),
         handles_(handles),
         agets_fixed_(false),
-        current_locals_(nullptr),
-        loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        locals_for_(graph->GetBlocks().size(),
-                    ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-                    graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    loop_headers_.reserve(kDefaultNumberOfLoops);
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    graph_->InitializeInexactObjectRTI(handles);
   }
 
   GraphAnalysisResult BuildSsa();
 
-  // Returns locals vector for `block`. If it is a catch block, the vector will be
-  // prepopulated with catch phis for vregs which are defined in `current_locals_`.
-  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
-  HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
 
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-  void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
-  void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
-  void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitArrayGet(HArrayGet* aget) OVERRIDE;
-  void VisitArraySet(HArraySet* aset) OVERRIDE;
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void MaybeAddAmbiguousArrayGet(HArrayGet* aget) {
+    Primitive::Type type = aget->GetType();
+    DCHECK(!Primitive::IsFloatingPointType(type));
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_agets_.push_back(aget);
+    }
+  }
+
+  void MaybeAddAmbiguousArraySet(HArraySet* aset) {
+    Primitive::Type type = aset->GetValue()->GetType();
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_asets_.push_back(aset);
+    }
+  }
+
+  void AddUninitializedString(HNewInstance* string) {
+    // In some rare cases (b/27847265), the same NewInstance may be seen
+    // multiple times. We should only consider it once for removal, so we
+    // ensure it is not added more than once.
+    // Note that we cannot check whether this really is a NewInstance of String
+    // before RTP. We DCHECK that in RemoveRedundantUninitializedStrings.
+    if (!ContainsElement(uninitialized_strings_, string)) {
+      uninitialized_strings_.push_back(string);
+    }
+  }
 
  private:
   void SetLoopHeaderPhiInputs();
@@ -95,9 +107,6 @@
   bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
   void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
 
-  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
-  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
-
   HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
   HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
@@ -105,25 +114,17 @@
 
   void RemoveRedundantUninitializedStrings();
 
+  HGraph* graph_;
+  Handle<mirror::DexCache> dex_cache_;
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
   bool agets_fixed_;
 
-  // Locals for the current block being visited.
-  ArenaVector<HInstruction*>* current_locals_;
-
-  // Keep track of loop headers found. The last phase of the analysis iterates
-  // over these blocks to set the inputs of their phis.
-  ArenaVector<HBasicBlock*> loop_headers_;
-
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
   ArenaVector<HNewInstance*> uninitialized_strings_;
 
-  // HEnvironment for each block.
-  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
-
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 83e9dac..36e0d99 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -283,11 +283,9 @@
       if (current->IsEmittedAtUseSite()) {
         if (kIsDebugBuild) {
           DCHECK(!current->GetLocations()->Out().IsValid());
-          for (HUseIterator<HInstruction*> use_it(current->GetUses());
-               !use_it.Done();
-               use_it.Advance()) {
-            HInstruction* user = use_it.Current()->GetUser();
-            size_t index = use_it.Current()->GetIndex();
+          for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
+            HInstruction* user = use.GetUser();
+            size_t index = use.GetIndex();
             DCHECK(!user->GetLocations()->InAt(index).IsValid());
           }
           DCHECK(!current->HasEnvironmentUses());
@@ -311,17 +309,8 @@
     }
 
     if (block->IsLoopHeader()) {
-      if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) {
-        // To satisfy our liveness algorithm, we need to ensure loop headers of
-        // irreducible loops do not have any live-in instructions, except constants
-        // and the current method, which can be trivially re-materialized.
-        for (uint32_t idx : live_in->Indexes()) {
-          HInstruction* instruction = GetInstructionFromSsaIndex(idx);
-          DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
-          DCHECK(!instruction->IsParameterValue()) << instruction->DebugName();
-          DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
-              << instruction->DebugName();
-        }
+      if (kIsDebugBuild) {
+        CheckNoLiveInIrreducibleLoop(*block);
       }
       size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
@@ -346,6 +335,9 @@
       // change in this loop), and the live_out set.  If the live_out
       // set does not change, there is no need to update the live_in set.
       if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+        if (kIsDebugBuild) {
+          CheckNoLiveInIrreducibleLoop(block);
+        }
         changed = true;
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97f2aee..1fcba8b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -969,8 +969,49 @@
     return false;
   }
 
+  bool IsLinearOrderWellFormed(const HGraph& graph) {
+    for (HBasicBlock* header : graph.GetBlocks()) {
+      if (header == nullptr || !header->IsLoopHeader()) {
+        continue;
+      }
+
+      HLoopInformation* loop = header->GetLoopInformation();
+      size_t num_blocks = loop->GetBlocks().NumSetBits();
+      size_t found_blocks = 0u;
+
+      for (HLinearOrderIterator it(graph); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (loop->Contains(*current)) {
+          found_blocks++;
+          if (found_blocks == 1u && current != header) {
+            // First block is not the header.
+            return false;
+          } else if (found_blocks == num_blocks && !loop->IsBackEdge(*current)) {
+            // Last block is not a back edge.
+            return false;
+          }
+        } else if (found_blocks != 0u && found_blocks != num_blocks) {
+          // Blocks are not adjacent.
+          return false;
+        }
+      }
+      DCHECK_EQ(found_blocks, num_blocks);
+    }
+
+    return true;
+  }
+
   void AddBackEdgeUses(const HBasicBlock& block_at_use) {
     DCHECK(block_at_use.IsInLoop());
+    if (block_at_use.GetGraph()->HasIrreducibleLoops()) {
+      // Linear order may not be well formed when irreducible loops are present,
+      // i.e. loop blocks may not be adjacent and a back edge may not be last,
+      // which violates assumptions made in this method.
+      return;
+    }
+
+    DCHECK(IsLinearOrderWellFormed(*block_at_use.GetGraph()));
+
     // Add synthesized uses at the back edge of loops to help the register allocator.
     // Note that this method is called in decreasing liveness order, to faciliate adding
     // uses at the head of the `first_use_` linked list. Because below
@@ -999,8 +1040,8 @@
         break;
       }
 
-      DCHECK(last_in_new_list == nullptr
-             || back_edge_use_position > last_in_new_list->GetPosition());
+      DCHECK(last_in_new_list == nullptr ||
+             back_edge_use_position > last_in_new_list->GetPosition());
 
       UsePosition* new_use = new (allocator_) UsePosition(
           /* user */ nullptr,
@@ -1219,6 +1260,23 @@
     return instruction->GetType() == Primitive::kPrimNot;
   }
 
+  void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const {
+    if (!block.IsLoopHeader() || !block.GetLoopInformation()->IsIrreducible()) {
+      return;
+    }
+    BitVector* live_in = GetLiveInSet(block);
+    // To satisfy our liveness algorithm, we need to ensure loop headers of
+    // irreducible loops do not have any live-in instructions, except constants
+    // and the current method, which can be trivially re-materialized.
+    for (uint32_t idx : live_in->Indexes()) {
+      HInstruction* instruction = GetInstructionFromSsaIndex(idx);
+      DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
+      DCHECK(!instruction->IsParameterValue());
+      DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
+          << instruction->DebugName();
+    }
+  }
+
   HGraph* const graph_;
   CodeGenerator* const codegen_;
   ArenaVector<BlockInfo*> block_infos_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 6816b6a..c67612e 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -17,6 +17,7 @@
 #include "ssa_phi_elimination.h"
 
 #include "base/arena_containers.h"
+#include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 
 namespace art {
@@ -30,7 +31,7 @@
   // Phis are constructed live and should not be revived if previously marked
   // dead. This algorithm temporarily breaks that invariant but we DCHECK that
   // only phis which were initially live are revived.
-  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter());
+  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
 
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -43,8 +44,8 @@
 
       bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses());
       if (!keep_alive) {
-        for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-          if (!use_it.Current()->GetUser()->IsPhi()) {
+        for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+          if (!use.GetUser()->IsPhi()) {
             keep_alive = true;
             break;
           }
@@ -94,9 +95,8 @@
       if (phi->IsDead()) {
         // Make sure the phi is only used by other dead phis.
         if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
-               use_it.Advance()) {
-            HInstruction* user = use_it.Current()->GetUser();
+          for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+            HInstruction* user = use.GetUser();
             DCHECK(user->IsLoopHeaderPhi());
             DCHECK(user->AsPhi()->IsDead());
           }
@@ -106,11 +106,9 @@
           phi->RemoveAsUserOfInput(i);
         }
         // Remove the phi from environments that use it.
-        for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done();
-             use_it.Advance()) {
-          HUseListNode<HEnvironment*>* user_node = use_it.Current();
-          HEnvironment* user = user_node->GetUser();
-          user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+        for (const HUseListNode<HEnvironment*>& use : phi->GetEnvUses()) {
+          HEnvironment* user = use.GetUser();
+          user->SetRawEnvAt(use.GetIndex(), nullptr);
         }
         // Delete it from the instruction list.
         block->RemovePhi(phi, /*ensure_safety=*/ false);
@@ -130,8 +128,11 @@
     }
   }
 
-  ArenaSet<uint32_t> visited_phis_in_cycle(graph_->GetArena()->Adapter());
-  ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter());
+  ArenaBitVector visited_phis_in_cycle(graph_->GetArena(),
+                                       graph_->GetCurrentInstructionId(),
+                                       /* expandable */ false,
+                                       kArenaAllocSsaPhiElimination);
+  ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
 
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
@@ -142,17 +143,18 @@
       continue;
     }
 
-    if (phi->InputCount() == 0) {
-      DCHECK(phi->IsDead());
+    // If the phi is dead, we know we won't revive it and it will be removed,
+    // so don't process it.
+    if (phi->IsDead()) {
       continue;
     }
 
     HInstruction* candidate = nullptr;
-    visited_phis_in_cycle.clear();
+    visited_phis_in_cycle.ClearAllBits();
     cycle_worklist.clear();
 
     cycle_worklist.push_back(phi);
-    visited_phis_in_cycle.insert(phi->GetId());
+    visited_phis_in_cycle.SetBit(phi->GetId());
     bool catch_phi_in_cycle = phi->IsCatchPhi();
     bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
 
@@ -184,9 +186,9 @@
           if (input == current) {
             continue;
           } else if (input->IsPhi()) {
-            if (!ContainsElement(visited_phis_in_cycle, input->GetId())) {
+            if (!visited_phis_in_cycle.IsBitSet(input->GetId())) {
               cycle_worklist.push_back(input->AsPhi());
-              visited_phis_in_cycle.insert(input->GetId());
+              visited_phis_in_cycle.SetBit(input->GetId());
               catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi();
               irreducible_loop_phi_in_cycle |= input->IsIrreducibleLoopHeaderPhi();
             } else {
@@ -233,10 +235,9 @@
 
       // Because we're updating the users of this phi, we may have new candidates
       // for elimination. Add phis that use this phi to the worklist.
-      for (HUseIterator<HInstruction*> it(current->GetUses()); !it.Done(); it.Advance()) {
-        HUseListNode<HInstruction*>* use = it.Current();
-        HInstruction* user = use->GetUser();
-        if (user->IsPhi() && !ContainsElement(visited_phis_in_cycle, user->GetId())) {
+      for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
+        HInstruction* user = use.GetUser();
+        if (user->IsPhi() && !visited_phis_in_cycle.IsBitSet(user->GetId())) {
           worklist_.push_back(user->AsPhi());
         }
       }
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index a688092..4297634 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -163,8 +163,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [8]\n"
-    "  2: IntConstant 5 [8]\n"
+    "  1: IntConstant 5 [8]\n"
+    "  2: IntConstant 4 [8]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -174,7 +174,7 @@
     "BasicBlock 3, pred: 1, succ: 4\n"
     "  7: Goto\n"
     "BasicBlock 4, pred: 2, 3, succ: 5\n"
-    "  8: Phi(1, 2) [9]\n"
+    "  8: Phi(2, 1) [9]\n"
     "  9: Return(8)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
@@ -258,19 +258,19 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [5]\n"
-    "  2: IntConstant 5 [9]\n"
+    "  1: IntConstant 5 [9]\n"
+    "  2: IntConstant 4 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
     "BasicBlock 2, pred: 1, 3, succ: 4, 3\n"
-    "  5: Phi(0, 1) [6, 6]\n"
+    "  5: Phi(0, 2) [6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 2\n"
     "  8: Goto\n"
     "BasicBlock 4, pred: 2, succ: 5\n"
-    "  9: Return(2)\n"
+    "  9: Return(1)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
 
@@ -326,8 +326,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [13]\n"
-    "  2: IntConstant 5 [13]\n"
+    "  1: IntConstant 5 [13]\n"
+    "  2: IntConstant 4 [13]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -346,7 +346,7 @@
     "BasicBlock 7, pred: 6\n"
     "  12: Exit\n"
     "BasicBlock 8, pred: 2, 3, succ: 4\n"
-    "  13: Phi(1, 2) [8, 8, 11]\n"
+    "  13: Phi(2, 1) [11, 8, 8]\n"
     "  14: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -496,7 +496,7 @@
   // does not update the local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n"
+    "  0: IntConstant 0 [4, 4, 8, 8, 6, 6, 2, 2]\n"
     "  1: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  2: Equal(0, 0) [3]\n"
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 3f41e35..11a254e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -137,34 +137,43 @@
 
 size_t StackMapStream::PrepareForFillIn() {
   int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
-  stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
-  inline_info_size_ = ComputeInlineInfoSize();
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  ComputeInlineInfoEncoding();  // needs dex_register_maps_size_.
+  inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
   uint32_t max_native_pc_offset = ComputeMaxNativePcOffset();
-  stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_,
-                                                          inline_info_size_,
-                                                          dex_register_maps_size_,
-                                                          dex_pc_max_,
-                                                          max_native_pc_offset,
-                                                          register_mask_max_);
-  stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize();
+  size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset,
+                                                           dex_pc_max_,
+                                                           dex_register_maps_size_,
+                                                           inline_info_size_,
+                                                           register_mask_max_,
+                                                           stack_mask_number_of_bits);
+  stack_maps_size_ = stack_maps_.size() * stack_map_size;
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
 
-  // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
-  needed_size_ = CodeInfo::kFixedSize
-      + stack_maps_size_
-      + dex_register_location_catalog_size_
-      + dex_register_maps_size_
-      + inline_info_size_;
+  size_t non_header_size =
+      stack_maps_size_ +
+      dex_register_location_catalog_size_ +
+      dex_register_maps_size_ +
+      inline_info_size_;
 
-  stack_maps_start_ = CodeInfo::kFixedSize;
+  // Prepare the CodeInfo variable-sized encoding.
+  CodeInfoEncoding code_info_encoding;
+  code_info_encoding.non_header_size = non_header_size;
+  code_info_encoding.number_of_stack_maps = stack_maps_.size();
+  code_info_encoding.stack_map_size_in_bytes = stack_map_size;
+  code_info_encoding.stack_map_encoding = stack_map_encoding_;
+  code_info_encoding.inline_info_encoding = inline_info_encoding_;
+  code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
+  code_info_encoding.Compress(&code_info_encoding_);
+
   // TODO: Move the catalog at the end. It is currently too expensive at runtime
   // to compute its size (note that we do not encode that size in the CodeInfo).
-  dex_register_location_catalog_start_ = stack_maps_start_ + stack_maps_size_;
+  dex_register_location_catalog_start_ = code_info_encoding_.size() + stack_maps_size_;
   dex_register_maps_start_ =
       dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
   inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
 
+  needed_size_ = code_info_encoding_.size() + non_header_size;
   return needed_size_;
 }
 
@@ -217,19 +226,39 @@
   return size;
 }
 
-size_t StackMapStream::ComputeInlineInfoSize() const {
-  return inline_infos_.size() * InlineInfo::SingleEntrySize()
-    // For encoding the depth.
-    + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
+void StackMapStream::ComputeInlineInfoEncoding() {
+  uint32_t method_index_max = 0;
+  uint32_t dex_pc_max = 0;
+  uint32_t invoke_type_max = 0;
+
+  uint32_t inline_info_index = 0;
+  for (const StackMapEntry& entry : stack_maps_) {
+    for (size_t j = 0; j < entry.inlining_depth; ++j) {
+      InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
+      method_index_max = std::max(method_index_max, inline_entry.method_index);
+      dex_pc_max = std::max(dex_pc_max, inline_entry.dex_pc);
+      invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type));
+    }
+  }
+  DCHECK_EQ(inline_info_index, inline_infos_.size());
+
+  inline_info_encoding_.SetFromSizes(method_index_max,
+                                     dex_pc_max,
+                                     invoke_type_max,
+                                     dex_register_maps_size_);
 }
 
 void StackMapStream::FillIn(MemoryRegion region) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
   DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
 
-  CodeInfo code_info(region);
   DCHECK_EQ(region.size(), needed_size_);
-  code_info.SetOverallSize(region.size());
+
+  // Note that the memory region does not have to be zeroed when we JIT code
+  // because we do not use the arena allocator there.
+
+  // Write the CodeInfo header.
+  region.CopyFrom(0, MemoryRegion(code_info_encoding_.data(), code_info_encoding_.size()));
 
   MemoryRegion dex_register_locations_region = region.Subregion(
       dex_register_maps_start_, dex_register_maps_size_);
@@ -237,12 +266,11 @@
   MemoryRegion inline_infos_region = region.Subregion(
       inline_infos_start_, inline_info_size_);
 
-  code_info.SetEncoding(stack_map_encoding_);
-  code_info.SetNumberOfStackMaps(stack_maps_.size());
-  DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_);
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  DCHECK_EQ(code_info.GetStackMapsSize(encoding), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
       dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
@@ -260,17 +288,22 @@
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_offset = 0;
   for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
-    StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_);
+    StackMap stack_map = code_info.GetStackMapAt(i, encoding);
     StackMapEntry entry = stack_maps_[i];
 
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset);
     stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
+    size_t number_of_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding_);
     if (entry.sp_mask != nullptr) {
-      stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
+      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
+        stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
+      }
     } else {
       // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
-      stack_map.SetStackMask(stack_map_encoding_, empty_bitmask);
+      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
+        stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
+      }
     }
 
     if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
@@ -282,7 +315,7 @@
         // If we have a hit reuse the offset.
         stack_map.SetDexRegisterMapOffset(
             stack_map_encoding_,
-            code_info.GetStackMapAt(entry.same_dex_register_map_as_, stack_map_encoding_)
+            code_info.GetStackMapAt(entry.same_dex_register_map_as_, encoding)
                 .GetDexRegisterMapOffset(stack_map_encoding_));
       } else {
         // New dex registers maps should be added to the stack map.
@@ -306,7 +339,7 @@
     if (entry.inlining_depth != 0) {
       MemoryRegion inline_region = inline_infos_region.Subregion(
           next_inline_info_offset,
-          InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize());
+          entry.inlining_depth * inline_info_encoding_.GetEntrySize());
       next_inline_info_offset += inline_region.size();
       InlineInfo inline_info(inline_region);
 
@@ -314,16 +347,18 @@
       stack_map.SetInlineDescriptorOffset(
           stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
 
-      inline_info.SetDepth(entry.inlining_depth);
+      inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
       DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
         InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
-        inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
-        inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
-        inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type);
+        inline_info.SetMethodIndexAtDepth(inline_info_encoding_, depth, inline_entry.method_index);
+        inline_info.SetDexPcAtDepth(inline_info_encoding_, depth, inline_entry.dex_pc);
+        inline_info.SetInvokeTypeAtDepth(inline_info_encoding_, depth, inline_entry.invoke_type);
         if (inline_entry.num_dex_registers == 0) {
           // No dex map available.
-          inline_info.SetDexRegisterMapOffsetAtDepth(depth, StackMap::kNoDexRegisterMap);
+          inline_info.SetDexRegisterMapOffsetAtDepth(inline_info_encoding_,
+                                                     depth,
+                                                     StackMap::kNoDexRegisterMap);
           DCHECK(inline_entry.live_dex_registers_mask == nullptr);
         } else {
           MemoryRegion register_region = dex_register_locations_region.Subregion(
@@ -333,7 +368,8 @@
           next_dex_register_map_offset += register_region.size();
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
-            depth, register_region.start() - dex_register_locations_region.start());
+              inline_info_encoding_,
+              depth, register_region.start() - dex_register_locations_region.start());
 
           FillInDexRegisterMap(dex_register_map,
                                inline_entry.num_dex_registers,
@@ -437,7 +473,7 @@
                                          size_t num_dex_registers,
                                          BitVector* live_dex_registers_mask,
                                          size_t dex_register_locations_index) const {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   for (size_t reg = 0; reg < num_dex_registers; reg++) {
     // Find the location we tried to encode.
     DexRegisterLocation expected = DexRegisterLocation::None();
@@ -464,25 +500,26 @@
 // Check that all StackMapStream inputs are correctly encoded by trying to read them back.
 void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size());
   for (size_t s = 0; s < stack_maps_.size(); ++s) {
     const StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+    const StackMapEncoding& stack_map_encoding = encoding.stack_map_encoding;
     StackMapEntry entry = stack_maps_[s];
 
     // Check main stack map fields.
-    DCHECK_EQ(stack_map.GetNativePcOffset(encoding), entry.native_pc_offset);
-    DCHECK_EQ(stack_map.GetDexPc(encoding), entry.dex_pc);
-    DCHECK_EQ(stack_map.GetRegisterMask(encoding), entry.register_mask);
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
+    DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding), entry.native_pc_offset);
+    DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
+    DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
+    size_t num_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding);
     if (entry.sp_mask != nullptr) {
-      DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
-      for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
-        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
+      DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
       }
     } else {
-      for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
-        DCHECK_EQ(stack_mask.LoadBit(b), 0u);
+      for (size_t b = 0; b < num_stack_mask_bits; b++) {
+        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
       }
     }
 
@@ -494,17 +531,20 @@
                         entry.dex_register_locations_start_index);
 
     // Check inline info.
-    DCHECK_EQ(stack_map.HasInlineInfo(encoding), (entry.inlining_depth != 0));
+    DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0));
     if (entry.inlining_depth != 0) {
       InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-      DCHECK_EQ(inline_info.GetDepth(), entry.inlining_depth);
+      DCHECK_EQ(inline_info.GetDepth(encoding.inline_info_encoding), entry.inlining_depth);
       for (size_t d = 0; d < entry.inlining_depth; ++d) {
         size_t inline_info_index = entry.inline_infos_start_index + d;
         DCHECK_LT(inline_info_index, inline_infos_.size());
         InlineInfoEntry inline_entry = inline_infos_[inline_info_index];
-        DCHECK_EQ(inline_info.GetDexPcAtDepth(d), inline_entry.dex_pc);
-        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(d), inline_entry.method_index);
-        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(d), inline_entry.invoke_type);
+        DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.dex_pc);
+        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.method_index);
+        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.invoke_type);
 
         CheckDexRegisterMap(code_info,
                             code_info.GetDexRegisterMapAtDepth(
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 016a911..41f72f5 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -74,13 +74,12 @@
                                            allocator->Adapter(kArenaAllocStackMapStream)),
         current_entry_(),
         current_inline_info_(),
-        stack_mask_size_(0),
+        code_info_encoding_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_info_size_(0),
         dex_register_maps_size_(0),
         stack_maps_size_(0),
         dex_register_location_catalog_size_(0),
         dex_register_location_catalog_start_(0),
-        stack_maps_start_(0),
         dex_register_maps_start_(0),
         inline_infos_start_(0),
         needed_size_(0),
@@ -90,6 +89,7 @@
     location_catalog_entries_.reserve(4);
     dex_register_locations_.reserve(10 * 4);
     inline_infos_.reserve(2);
+    code_info_encoding_.reserve(16);
   }
 
   // See runtime/stack_map.h to know what these fields contain.
@@ -156,7 +156,7 @@
   size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
                                    const BitVector* live_dex_registers_mask) const;
   size_t ComputeDexRegisterMapsSize() const;
-  size_t ComputeInlineInfoSize() const;
+  void ComputeInlineInfoEncoding();
 
   // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
@@ -200,13 +200,13 @@
   StackMapEntry current_entry_;
   InlineInfoEntry current_inline_info_;
   StackMapEncoding stack_map_encoding_;
-  size_t stack_mask_size_;
+  InlineInfoEncoding inline_info_encoding_;
+  ArenaVector<uint8_t> code_info_encoding_;
   size_t inline_info_size_;
   size_t dex_register_maps_size_;
   size_t stack_maps_size_;
   size_t dex_register_location_catalog_size_;
   size_t dex_register_location_catalog_start_;
-  size_t stack_maps_start_;
   size_t dex_register_maps_start_;
   size_t inline_infos_start_;
   size_t needed_size_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 604787f..967fd96 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -23,9 +23,18 @@
 
 namespace art {
 
-static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
-  for (size_t i = 0; i < region.size_in_bits(); ++i) {
-    if (region.LoadBit(i) != bit_vector.IsBitSet(i)) {
+// Check that the stack mask of given stack map is identical
+// to the given bit vector. Returns true if they are same.
+static bool CheckStackMask(
+    const StackMap& stack_map,
+    StackMapEncoding& encoding,
+    const BitVector& bit_vector) {
+  int number_of_bits = stack_map.GetNumberOfStackMaskBits(encoding);
+  if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+    return false;
+  }
+  for (int i = 0; i < number_of_bits; ++i) {
+    if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
       return false;
     }
   }
@@ -52,12 +61,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(2u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(2u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - one 1-byte short Dex register location, and
@@ -68,14 +76,13 @@
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-  ASSERT_TRUE(SameBits(stack_mask, sp_mask));
+  ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask));
 
-  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
       code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
   ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -100,9 +107,9 @@
   ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
   size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-      0, number_of_dex_registers, number_of_location_catalog_entries);
+      0, number_of_dex_registers, number_of_catalog_entries);
   size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-      1, number_of_dex_registers, number_of_location_catalog_entries);
+      1, number_of_dex_registers, number_of_catalog_entries);
   ASSERT_EQ(0u, index0);
   ASSERT_EQ(1u, index1);
   DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -114,7 +121,7 @@
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, Test2) {
@@ -166,12 +173,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(2u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(4u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(4u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(7u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(7u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - six 1-byte short Dex register locations, and
@@ -184,14 +190,13 @@
     StackMap stack_map = code_info.GetStackMapAt(0, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-    ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask1));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -216,9 +221,9 @@
     ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(0u, index0);
     ASSERT_EQ(1u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -230,15 +235,15 @@
     ASSERT_EQ(0, location0.GetValue());
     ASSERT_EQ(-2, location1.GetValue());
 
-    ASSERT_TRUE(stack_map.HasInlineInfo(encoding));
+    ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
     InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-    ASSERT_EQ(2u, inline_info.GetDepth());
-    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(0));
-    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(1));
-    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
   }
 
   // Second stack map.
@@ -246,14 +251,13 @@
     StackMap stack_map = code_info.GetStackMapAt(1, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding)));
-    ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask2));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -279,9 +283,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(2u, index0);
     ASSERT_EQ(3u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -293,7 +297,7 @@
     ASSERT_EQ(18, location0.GetValue());
     ASSERT_EQ(3, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   // Third stack map.
@@ -301,14 +305,13 @@
     StackMap stack_map = code_info.GetStackMapAt(2, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding)));
-    ASSERT_EQ(2u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask3));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask3));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -334,9 +337,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(4u, index0);
     ASSERT_EQ(5u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -348,7 +351,7 @@
     ASSERT_EQ(6, location0.GetValue());
     ASSERT_EQ(8, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   // Fourth stack map.
@@ -356,14 +359,13 @@
     StackMap stack_map = code_info.GetStackMapAt(3, encoding);
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u, encoding)));
     ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding)));
-    ASSERT_EQ(3u, stack_map.GetDexPc(encoding));
-    ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding));
-    ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding));
+    ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map_encoding));
+    ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+    ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    ASSERT_TRUE(SameBits(stack_mask, sp_mask4));
+    ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask4));
 
-    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+    ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
@@ -389,9 +391,9 @@
                   1, number_of_dex_registers, code_info, encoding));
 
     size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-        0, number_of_dex_registers, number_of_location_catalog_entries);
+        0, number_of_dex_registers, number_of_catalog_entries);
     size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
-        1, number_of_dex_registers, number_of_location_catalog_entries);
+        1, number_of_dex_registers, number_of_catalog_entries);
     ASSERT_EQ(3u, index0);  // Shared with second stack map.
     ASSERT_EQ(6u, index1);
     DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -403,7 +405,7 @@
     ASSERT_EQ(3, location0.GetValue());
     ASSERT_EQ(1, location1.GetValue());
 
-    ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+    ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
   }
 }
 
@@ -425,12 +427,11 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(1u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(1u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   // The Dex register location catalog contains:
   // - one 5-byte large Dex register location.
@@ -440,11 +441,11 @@
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
       code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
   ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
@@ -467,9 +468,9 @@
   ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
 
   size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
-      0, number_of_dex_registers, number_of_location_catalog_entries);
+      0, number_of_dex_registers, number_of_catalog_entries);
   size_t index1 =  dex_register_map.GetLocationCatalogEntryIndex(
-      1, number_of_dex_registers, number_of_location_catalog_entries);
+      1, number_of_dex_registers, number_of_catalog_entries);
   ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
   ASSERT_EQ(0u, index1);
   DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -481,7 +482,7 @@
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 // Generate a stack map whose dex register offset is
@@ -518,13 +519,13 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
   // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
   // has a size of 1 bit.
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(2u, number_of_location_catalog_entries);
-  ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(2u, number_of_catalog_entries);
+  ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_catalog_entries));
 
   // The first Dex register map contains:
   // - a live register bit mask for 1024 registers (that is, 128 bytes of
@@ -537,16 +538,17 @@
   DexRegisterMap dex_register_map0 =
       code_info.GetDexRegisterMapOf(stack_map0, encoding, number_of_dex_registers);
   ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
-                                                               number_of_location_catalog_entries));
+                                                               number_of_catalog_entries));
   ASSERT_EQ(255u, dex_register_map0.Size());
 
   StackMap stack_map1 = code_info.GetStackMapAt(1, encoding);
-  ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding));
+  ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding.stack_map_encoding));
   // ...the offset of the second Dex register map (relative to the
   // beginning of the Dex register maps region) is 255 (i.e.,
   // kNoDexRegisterMapSmallEncoding).
-  ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding), StackMap::kNoDexRegisterMap);
-  ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding), 0xFFu);
+  ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            StackMap::kNoDexRegisterMap);
+  ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding.stack_map_encoding), 0xFFu);
 }
 
 TEST(StackMapTest, TestShareDexRegisterMap) {
@@ -578,7 +580,7 @@
   stream.FillIn(region);
 
   CodeInfo ci(region);
-  StackMapEncoding encoding = ci.ExtractEncoding();
+  CodeInfoEncoding encoding = ci.ExtractEncoding();
 
   // Verify first stack map.
   StackMap sm0 = ci.GetStackMapAt(0, encoding);
@@ -599,9 +601,12 @@
   ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci, encoding));
 
   // Verify dex register map offsets.
-  ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding), sm1.GetDexRegisterMapOffset(encoding));
-  ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding), sm2.GetDexRegisterMapOffset(encoding));
-  ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding), sm2.GetDexRegisterMapOffset(encoding));
+  ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm1.GetDexRegisterMapOffset(encoding.stack_map_encoding));
+  ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm2.GetDexRegisterMapOffset(encoding.stack_map_encoding));
+  ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding.stack_map_encoding),
+            sm2.GetDexRegisterMapOffset(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, TestNoDexRegisterMap) {
@@ -624,34 +629,33 @@
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
 
-  uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
-  ASSERT_EQ(0u, number_of_location_catalog_entries);
+  uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+  ASSERT_EQ(0u, number_of_catalog_entries);
   DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
   ASSERT_EQ(0u, location_catalog.Size());
 
   StackMap stack_map = code_info.GetStackMapAt(0, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
-  ASSERT_EQ(0u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 
   stack_map = code_info.GetStackMapAt(1, encoding);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
-  ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
-  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
-  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding.stack_map_encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
-  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
@@ -726,7 +730,7 @@
   stream.FillIn(region);
 
   CodeInfo ci(region);
-  StackMapEncoding encoding = ci.ExtractEncoding();
+  CodeInfoEncoding encoding = ci.ExtractEncoding();
 
   {
     // Verify first stack map.
@@ -737,13 +741,13 @@
     ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding);
-    ASSERT_EQ(2u, if0.GetDepth());
-    ASSERT_EQ(2u, if0.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if0.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1);
     ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -763,16 +767,16 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding);
-    ASSERT_EQ(3u, if1.GetDepth());
-    ASSERT_EQ(2u, if1.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if1.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(5u, if1.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1);
     ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -782,7 +786,7 @@
     ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding));
     ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding));
 
-    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2));
+    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 2));
   }
 
   {
@@ -792,7 +796,7 @@
     DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, encoding, 2);
     ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0));
     ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
-    ASSERT_FALSE(sm2.HasInlineInfo(encoding));
+    ASSERT_FALSE(sm2.HasInlineInfo(encoding.stack_map_encoding));
   }
 
   {
@@ -804,18 +808,18 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding);
-    ASSERT_EQ(3u, if2.GetDepth());
-    ASSERT_EQ(2u, if2.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(5u, if2.GetDexPcAtDepth(1));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(10u, if2.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
-    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0));
+    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 0));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1);
     ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding));
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 48465e6..1ee1c4d 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -16,6 +16,7 @@
 
 #include "trampoline_compiler.h"
 
+#include "base/arena_allocator.h"
 #include "jni_env_ext.h"
 
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -48,9 +49,9 @@
 
 #ifdef ART_ENABLE_CODEGEN_arm
 namespace arm {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  Thumb2Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+  Thumb2Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
@@ -68,19 +69,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm
 #endif  // ART_ENABLE_CODEGEN_arm
 
 #ifdef ART_ENABLE_CODEGEN_arm64
 namespace arm64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Arm64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+  Arm64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
@@ -107,19 +108,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm64
 #endif  // ART_ENABLE_CODEGEN_arm64
 
 #ifdef ART_ENABLE_CODEGEN_mips
 namespace mips {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  MipsAssembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+  MipsAssembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -139,19 +140,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_mips64
 namespace mips64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Mips64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+  Mips64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -171,18 +172,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips64
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  X86Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset<4> offset) {
+  X86Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
@@ -191,18 +193,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86
 #endif  // ART_ENABLE_CODEGEN_x86
 
 #ifdef ART_ENABLE_CODEGEN_x86_64
 namespace x86_64 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  x86_64::X86_64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset<8> offset) {
+  x86_64::X86_64Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in gs.
   __ gs()->jmp(x86_64::Address::Absolute(offset, true));
@@ -211,28 +214,31 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86_64
 #endif  // ART_ENABLE_CODEGEN_x86_64
 
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<8> offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return arm64::CreateTrampoline(abi, offset);
+      return arm64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return mips64::CreateTrampoline(abi, offset);
+      return mips64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return x86_64::CreateTrampoline(offset);
+      return x86_64::CreateTrampoline(&arena, offset);
 #endif
     default:
       UNUSED(abi);
@@ -242,22 +248,25 @@
   }
 }
 
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<4> offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return arm::CreateTrampoline(abi, offset);
+      return arm::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return mips::CreateTrampoline(abi, offset);
+      return mips::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       UNUSED(abi);
-      return x86::CreateTrampoline(offset);
+      return x86::CreateTrampoline(&arena, offset);
 #endif
     default:
       LOG(FATAL) << "Unexpected InstructionSet: " << isa;
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 66d5ac3..8f823f1 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -25,12 +25,12 @@
 namespace art {
 
 // Create code that will invoke the function held in thread local storage.
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa,
-                                               EntryPointCallingConvention abi,
-                                               ThreadOffset<4> entry_point_offset);
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa,
-                                               EntryPointCallingConvention abi,
-                                               ThreadOffset<8> entry_point_offset);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<4> entry_point_offset);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<8> entry_point_offset);
 
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index dead8fd..e5f91dc 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -845,7 +845,7 @@
 
 void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch, stack_adjust);
+  ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  TR, Thread::ExceptionOffset<4>().Int32Value());
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index a894565..ffbe786 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -1078,6 +1080,9 @@
   }
 
  protected:
+  explicit ArmAssembler(ArenaAllocator* arena)
+      : Assembler(arena), tracked_labels_(arena->Adapter(kArenaAllocAssembler)) {}
+
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
@@ -1086,7 +1091,7 @@
   void FinalizeTrackedLabels();
 
   // Tracked labels. Use a vector, as we need to sort before adjusting.
-  std::vector<Label*> tracked_labels_;
+  ArenaVector<Label*> tracked_labels_;
 };
 
 // Slowpath entered when Thread::Current()->_exception is non-null
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index e3e05ca..bc6020e 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -30,8 +30,7 @@
 
 class Arm32Assembler FINAL : public ArmAssembler {
  public:
-  Arm32Assembler() {
-  }
+  explicit Arm32Assembler(ArenaAllocator* arena) : ArmAssembler(arena) {}
   virtual ~Arm32Assembler() {}
 
   bool IsThumb() const OVERRIDE {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 15298b3..546dd65 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <type_traits>
+
 #include "assembler_thumb2.h"
 
 #include "base/bit_utils.h"
@@ -25,6 +27,30 @@
 namespace art {
 namespace arm {
 
+template <typename Function>
+void Thumb2Assembler::Fixup::ForExpandableDependencies(Thumb2Assembler* assembler, Function fn) {
+  static_assert(
+      std::is_same<typename std::result_of<Function(FixupId, FixupId)>::type, void>::value,
+      "Incorrect signature for argument `fn`: expected (FixupId, FixupId) -> void");
+  Fixup* fixups = assembler->fixups_.data();
+  for (FixupId fixup_id = 0u, end_id = assembler->fixups_.size(); fixup_id != end_id; ++fixup_id) {
+    uint32_t target = fixups[fixup_id].target_;
+    if (target > fixups[fixup_id].location_) {
+      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
+        if (fixups[id].CanExpand()) {
+          fn(id, fixup_id);
+        }
+      }
+    } else {
+      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
+        if (fixups[id - 1u].CanExpand()) {
+          fn(id - 1u, fixup_id);
+        }
+      }
+    }
+  }
+}
+
 void Thumb2Assembler::Fixup::PrepareDependents(Thumb2Assembler* assembler) {
   // For each Fixup, it's easy to find the Fixups that it depends on as they are either
   // the following or the preceding Fixups until we find the target. However, for fixup
@@ -34,24 +60,16 @@
   // index and count. (Instead of having a per-fixup vector.)
 
   // Count the number of dependents of each Fixup.
-  const FixupId end_id = assembler->fixups_.size();
   Fixup* fixups = assembler->fixups_.data();
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
-    uint32_t target = fixups[fixup_id].target_;
-    if (target > fixups[fixup_id].location_) {
-      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
-        fixups[id].dependents_count_ += 1u;
-      }
-    } else {
-      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
-        fixups[id - 1u].dependents_count_ += 1u;
-      }
-    }
-  }
+  ForExpandableDependencies(
+      assembler,
+      [fixups](FixupId dependency, FixupId dependent ATTRIBUTE_UNUSED) {
+        fixups[dependency].dependents_count_ += 1u;
+      });
   // Assign index ranges in fixup_dependents_ to individual fixups. Record the end of the
   // range in dependents_start_, we shall later decrement it as we fill in fixup_dependents_.
   uint32_t number_of_dependents = 0u;
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
+  for (FixupId fixup_id = 0u, end_id = assembler->fixups_.size(); fixup_id != end_id; ++fixup_id) {
     number_of_dependents += fixups[fixup_id].dependents_count_;
     fixups[fixup_id].dependents_start_ = number_of_dependents;
   }
@@ -59,22 +77,14 @@
     return;
   }
   // Create and fill in the fixup_dependents_.
-  assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]);
-  FixupId* dependents = assembler->fixup_dependents_.get();
-  for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
-    uint32_t target = fixups[fixup_id].target_;
-    if (target > fixups[fixup_id].location_) {
-      for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) {
-        fixups[id].dependents_start_ -= 1u;
-        dependents[fixups[id].dependents_start_] = fixup_id;
-      }
-    } else {
-      for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) {
-        fixups[id - 1u].dependents_start_ -= 1u;
-        dependents[fixups[id - 1u].dependents_start_] = fixup_id;
-      }
-    }
-  }
+  assembler->fixup_dependents_.resize(number_of_dependents);
+  FixupId* dependents = assembler->fixup_dependents_.data();
+  ForExpandableDependencies(
+      assembler,
+      [fixups, dependents](FixupId dependency, FixupId dependent) {
+        fixups[dependency].dependents_start_ -= 1u;
+        dependents[fixups[dependency].dependents_start_] = dependent;
+      });
 }
 
 void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) {
@@ -115,6 +125,7 @@
                                           std::deque<FixupId>* fixups_to_recalculate) {
   uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size);
   if (adjustment != 0u) {
+    DCHECK(fixup->CanExpand());
     *current_code_size += adjustment;
     for (FixupId dependent_id : fixup->Dependents(*this)) {
       Fixup* dependent = GetFixup(dependent_id);
@@ -256,7 +267,10 @@
     for (JumpTable& table : jump_tables_) {
       // Bulk ensure capacity, as this may be large.
       size_t orig_size = buffer_.Size();
-      buffer_.ExtendCapacity(orig_size + table.GetSize());
+      size_t required_capacity = orig_size + table.GetSize();
+      if (required_capacity > buffer_.Capacity()) {
+        buffer_.ExtendCapacity(required_capacity);
+      }
 #ifndef NDEBUG
       buffer_.has_ensured_capacity_ = true;
 #endif
@@ -2543,9 +2557,19 @@
       }
     } else {
       branch_type = Fixup::kUnconditional;             // B.
+      // The T2 encoding offset is `SignExtend(imm11:'0', 32)` and there is a PC adjustment of 4.
+      static constexpr size_t kMaxT2BackwardDistance = (1u << 11) - 4u;
+      if (!use32bit && label->IsBound() && pc - label->Position() > kMaxT2BackwardDistance) {
+        use32bit = true;
+      }
     }
   } else {
     branch_type = Fixup::kConditional;                 // B<cond>.
+    // The T1 encoding offset is `SignExtend(imm8:'0', 32)` and there is a PC adjustment of 4.
+    static constexpr size_t kMaxT1BackwardDistance = (1u << 8) - 4u;
+    if (!use32bit && label->IsBound() && pc - label->Position() > kMaxT1BackwardDistance) {
+      use32bit = true;
+    }
   }
 
   Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit;
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 6b61aca..ce310a4 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
@@ -33,14 +34,16 @@
 
 class Thumb2Assembler FINAL : public ArmAssembler {
  public:
-  explicit Thumb2Assembler(bool can_relocate_branches = true)
-      : can_relocate_branches_(can_relocate_branches),
+  explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
+      : ArmAssembler(arena),
+        can_relocate_branches_(can_relocate_branches),
         force_32bit_(false),
         it_cond_index_(kNoItCondition),
         next_condition_(AL),
-        fixups_(),
-        fixup_dependents_(),
-        literals_(),
+        fixups_(arena->Adapter(kArenaAllocAssembler)),
+        fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
+        jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0u),
         last_old_position_(0u),
         last_fixup_id_(0u) {
@@ -535,6 +538,20 @@
       return GetType() >= kLoadLiteralNarrow;
     }
 
+    // Returns whether the Fixup can expand from the original size.
+    bool CanExpand() const {
+      switch (GetOriginalSize()) {
+        case kBranch32Bit:
+        case kCbxz48Bit:
+        case kLiteralFar:
+        case kLiteralAddrFar:
+        case kLongOrFPLiteralFar:
+          return false;
+        default:
+          return true;
+      }
+    }
+
     Size GetOriginalSize() const {
       return original_size_;
     }
@@ -558,9 +575,9 @@
     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
     static void PrepareDependents(Thumb2Assembler* assembler);
 
-    ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const {
-      return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_,
-                               dependents_count_);
+    ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
+      return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
+                                                                           dependents_count_);
     }
 
     // Resolve a branch when the target is known.
@@ -608,6 +625,7 @@
           dependents_count_(0u),
           dependents_start_(0u) {
     }
+
     static size_t SizeInBytes(Size size);
 
     // The size of padding added before the literal pool.
@@ -620,6 +638,9 @@
 
     int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
 
+    template <typename Function>
+    static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);
+
     static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
 
     const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
@@ -839,15 +860,15 @@
   static int16_t AdrEncoding16(Register rd, int32_t offset);
   static int32_t AdrEncoding32(Register rd, int32_t offset);
 
-  std::vector<Fixup> fixups_;
-  std::unique_ptr<FixupId[]> fixup_dependents_;
+  ArenaVector<Fixup> fixups_;
+  ArenaVector<FixupId> fixup_dependents_;
 
   // Use std::deque<> for literal labels to allow insertions at the end
   // without invalidating pointers and references to existing elements.
-  std::deque<Literal> literals_;
+  ArenaDeque<Literal> literals_;
 
   // Jump table list.
-  std::deque<JumpTable> jump_tables_;
+  ArenaDeque<JumpTable> jump_tables_;
 
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 650b089..b5cafcb 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -372,6 +372,31 @@
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
 
+TEST_F(AssemblerThumb2Test, DistantBackBranch) {
+  Label start, end;
+  __ Bind(&start);
+  constexpr size_t kLdrR0R0Count1 = 256;
+  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ b(&end, arm::EQ);
+  __ b(&start, arm::LT);
+  constexpr size_t kLdrR0R0Count2 = 256;
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ Bind(&end);
+
+  std::string expected =
+      "0:\n" +
+      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
+      "beq 1f\n"
+      "blt 0b\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      "1:\n";
+  DriverStr(expected, "DistantBackBranch");
+}
+
 TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) {
   Label label0, label1, label2;
   __ cbz(arm::R0, &label1);
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 0e17512..eb5112b 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -32,10 +32,8 @@
 #endif
 
 void Arm64Assembler::FinalizeCode() {
-  if (!exception_blocks_.empty()) {
-    for (size_t i = 0; i < exception_blocks_.size(); i++) {
-      EmitExceptionPoll(exception_blocks_.at(i));
-    }
+  for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) {
+    EmitExceptionPoll(exception.get());
   }
   ___ FinalizeCode();
 }
@@ -613,10 +611,9 @@
 void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kStackAlignment);
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
-  exception_blocks_.push_back(current_exception);
+  exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
   LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), current_exception->Entry());
+  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
 }
 
 void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 7b25b8f..c4e5de7 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -21,6 +21,7 @@
 #include <memory>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "constants_arm64.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -61,13 +62,34 @@
   kStoreDWord
 };
 
-class Arm64Exception;
+class Arm64Exception {
+ private:
+  Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {
+    }
+
+  vixl::Label* Entry() { return &exception_entry_; }
+
+  // Register used for passing Thread::Current()->exception_ .
+  const Arm64ManagedRegister scratch_;
+
+  // Stack adjust for ExceptionPool.
+  const size_t stack_adjust_;
+
+  vixl::Label exception_entry_;
+
+  friend class Arm64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
+};
 
 class Arm64Assembler FINAL : public Assembler {
  public:
   // We indicate the size of the initial code generation buffer to the VIXL
   // assembler. From there we it will automatically manage the buffer.
-  Arm64Assembler() : vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
+  explicit Arm64Assembler(ArenaAllocator* arena)
+      : Assembler(arena),
+        exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
+        vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
 
   virtual ~Arm64Assembler() {
     delete vixl_masm_;
@@ -249,7 +271,7 @@
   void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
 
   // List of exception blocks to generate at the end of the code cache.
-  std::vector<Arm64Exception*> exception_blocks_;
+  ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
 
  public:
   // Vixl assembler.
@@ -259,26 +281,6 @@
   friend class Arm64ManagedRegister_VixlRegisters_Test;
 };
 
-class Arm64Exception {
- private:
-  Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {
-    }
-
-  vixl::Label* Entry() { return &exception_entry_; }
-
-  // Register used for passing Thread::Current()->exception_ .
-  const Arm64ManagedRegister scratch_;
-
-  // Stack adjust for ExceptionPool.
-  const size_t stack_adjust_;
-
-  vixl::Label exception_entry_;
-
-  friend class Arm64Assembler;
-  DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
-};
-
 }  // namespace arm64
 }  // namespace art
 
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index f784d2c..e6c3a18 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -44,14 +44,10 @@
 
 namespace art {
 
-static uint8_t* NewContents(size_t capacity) {
-  return new uint8_t[capacity];
-}
-
-
-AssemblerBuffer::AssemblerBuffer() {
+AssemblerBuffer::AssemblerBuffer(ArenaAllocator* arena)
+    : arena_(arena) {
   static const size_t kInitialBufferCapacity = 4 * KB;
-  contents_ = NewContents(kInitialBufferCapacity);
+  contents_ = arena_->AllocArray<uint8_t>(kInitialBufferCapacity, kArenaAllocAssembler);
   cursor_ = contents_;
   limit_ = ComputeLimit(contents_, kInitialBufferCapacity);
   fixup_ = nullptr;
@@ -68,7 +64,9 @@
 
 
 AssemblerBuffer::~AssemblerBuffer() {
-  delete[] contents_;
+  if (arena_->IsRunningOnMemoryTool()) {
+    arena_->MakeInaccessible(contents_, Capacity());
+  }
 }
 
 
@@ -96,23 +94,17 @@
 void AssemblerBuffer::ExtendCapacity(size_t min_capacity) {
   size_t old_size = Size();
   size_t old_capacity = Capacity();
+  DCHECK_GT(min_capacity, old_capacity);
   size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB);
   new_capacity = std::max(new_capacity, min_capacity);
 
   // Allocate the new data area and copy contents of the old one to it.
-  uint8_t* new_contents = NewContents(new_capacity);
-  memmove(reinterpret_cast<void*>(new_contents),
-          reinterpret_cast<void*>(contents_),
-          old_size);
-
-  // Compute the relocation delta and switch to the new contents area.
-  ptrdiff_t delta = new_contents - contents_;
-  delete[] contents_;
-  contents_ = new_contents;
+  contents_ = reinterpret_cast<uint8_t*>(
+      arena_->Realloc(contents_, old_capacity, new_capacity, kArenaAllocAssembler));
 
   // Update the cursor and recompute the limit.
-  cursor_ += delta;
-  limit_ = ComputeLimit(new_contents, new_capacity);
+  cursor_ = contents_ + old_size;
+  limit_ = ComputeLimit(contents_, new_capacity);
 
   // Verify internal state.
   CHECK_EQ(Capacity(), new_capacity);
@@ -129,36 +121,40 @@
   }
 }
 
-Assembler* Assembler::Create(InstructionSet instruction_set,
-                             const InstructionSetFeatures* instruction_set_features) {
+std::unique_ptr<Assembler> Assembler::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
-      return new arm::Arm32Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena));
     case kThumb2:
-      return new arm::Thumb2Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsAssembler(instruction_set_features != nullptr
-                                         ? instruction_set_features->AsMipsInstructionSetFeatures()
-                                         : nullptr);
+      return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86Assembler();
+      return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 414ea7e..5267dc3 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -22,6 +22,8 @@
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
 #include "arm/constants_arm.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
@@ -60,7 +62,7 @@
 };
 
 // Parent of all queued slow paths, emitted during finalization
-class SlowPath {
+class SlowPath : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
   SlowPath() : next_(nullptr) {}
   virtual ~SlowPath() {}
@@ -85,9 +87,13 @@
 
 class AssemblerBuffer {
  public:
-  AssemblerBuffer();
+  explicit AssemblerBuffer(ArenaAllocator* arena);
   ~AssemblerBuffer();
 
+  ArenaAllocator* GetArena() {
+    return arena_;
+  }
+
   // Basic support for emitting, loading, and storing.
   template<typename T> void Emit(T value) {
     CHECK(HasEnsuredCapacity());
@@ -172,8 +178,8 @@
   class EnsureCapacity {
    public:
     explicit EnsureCapacity(AssemblerBuffer* buffer) {
-      if (buffer->cursor() >= buffer->limit()) {
-        buffer->ExtendCapacity();
+      if (buffer->cursor() > buffer->limit()) {
+        buffer->ExtendCapacity(buffer->Size() + kMinimumGap);
       }
       // In debug mode, we save the assembler buffer along with the gap
       // size before we start emitting to the buffer. This allows us to
@@ -213,7 +219,9 @@
   class EnsureCapacity {
    public:
     explicit EnsureCapacity(AssemblerBuffer* buffer) {
-      if (buffer->cursor() >= buffer->limit()) buffer->ExtendCapacity();
+      if (buffer->cursor() > buffer->limit()) {
+        buffer->ExtendCapacity(buffer->Size() + kMinimumGap);
+      }
     }
   };
 
@@ -227,7 +235,14 @@
   // Returns the position in the instruction stream.
   int GetPosition() { return  cursor_ - contents_; }
 
-  void ExtendCapacity(size_t min_capacity = 0u);
+  size_t Capacity() const {
+    CHECK_GE(limit_, contents_);
+    return (limit_ - contents_) + kMinimumGap;
+  }
+
+  // Unconditionally increase the capacity.
+  // The provided `min_capacity` must be higher than current `Capacity()`.
+  void ExtendCapacity(size_t min_capacity);
 
  private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
@@ -235,6 +250,7 @@
   // for a single, fast space check per instruction.
   static const int kMinimumGap = 32;
 
+  ArenaAllocator* arena_;
   uint8_t* contents_;
   uint8_t* cursor_;
   uint8_t* limit_;
@@ -248,10 +264,6 @@
 
   uint8_t* cursor() const { return cursor_; }
   uint8_t* limit() const { return limit_; }
-  size_t Capacity() const {
-    CHECK_GE(limit_, contents_);
-    return (limit_ - contents_) + kMinimumGap;
-  }
 
   // Process the fixup chain starting at the given fixup. The offset is
   // non-zero for fixups in the body if the preamble is non-empty.
@@ -299,8 +311,10 @@
   // Override the last delayed PC. The new PC can be out of order.
   void OverrideDelayedPC(size_t pc) {
     DCHECK(delay_emitting_advance_pc_);
-    DCHECK(!delayed_advance_pcs_.empty());
-    delayed_advance_pcs_.back().pc = pc;
+    if (enabled_) {
+      DCHECK(!delayed_advance_pcs_.empty());
+      delayed_advance_pcs_.back().pc = pc;
+    }
   }
 
   // Return the number of delayed advance PC entries.
@@ -338,10 +352,12 @@
   std::vector<DelayedAdvancePC> delayed_advance_pcs_;
 };
 
-class Assembler {
+class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static Assembler* Create(InstructionSet instruction_set,
-                           const InstructionSetFeatures* instruction_set_features = nullptr);
+  static std::unique_ptr<Assembler> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
 
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
@@ -504,7 +520,11 @@
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
  protected:
-  Assembler() : buffer_(), cfi_(this) {}
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
+
+  ArenaAllocator* GetArena() {
+    return buffer_.GetArena();
+  }
 
   AssemblerBuffer buffer_;
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 2579ddb..084e901 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -460,7 +460,8 @@
   explicit AssemblerTest() {}
 
   void SetUp() OVERRIDE {
-    assembler_.reset(new Ass());
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(new (arena_.get()) Ass(arena_.get()));
     test_helper_.reset(
         new AssemblerTestInfrastructure(GetArchitectureString(),
                                         GetAssemblerCmdName(),
@@ -476,6 +477,8 @@
 
   void TearDown() OVERRIDE {
     test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
   }
 
   // Override this to set up any architecture-specific things, e.g., register vectors.
@@ -919,6 +922,8 @@
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
 
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
   std::unique_ptr<Ass> assembler_;
   std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2df9b17..9c9271d 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -32,7 +32,7 @@
 // Include results file (generated manually)
 #include "assembler_thumb_test_expected.cc.inc"
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 // This controls whether the results are printed to the
 // screen or compared against the expected output.
 // To generate new expected output, set this to true and
@@ -72,7 +72,7 @@
 }
 
 std::string GetToolsDir() {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   // This will only work on the host.  There is no as, objcopy or objdump on the device.
   static std::string toolsdir;
 
@@ -89,7 +89,7 @@
 }
 
 void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) {
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
   static std::string toolsdir = GetToolsDir();
 
   ScratchFile file;
@@ -169,7 +169,7 @@
 
   snprintf(buf, sizeof(buf), "%s.oo", filename);
   unlink(buf);
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 #define __ assembler->
@@ -195,11 +195,18 @@
 
 #undef __
 
+class Thumb2AssemblerTest : public ::testing::Test {
+ public:
+  Thumb2AssemblerTest() : pool(), arena(&pool), assembler(&arena) { }
+
+  ArenaPool pool;
+  ArenaAllocator arena;
+  arm::Thumb2Assembler assembler;
+};
+
 #define __ assembler.
 
-TEST(Thumb2AssemblerTest, SimpleMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMov) {
   __ movs(R0, ShifterOperand(R1));
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
@@ -210,8 +217,7 @@
   EmitAndCheck(&assembler, "SimpleMov");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMov32) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, SimpleMov32) {
   __ Force32Bit();
 
   __ mov(R0, ShifterOperand(R1));
@@ -220,9 +226,7 @@
   EmitAndCheck(&assembler, "SimpleMov32");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMovAdd) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMovAdd) {
   __ mov(R0, ShifterOperand(R1));
   __ adds(R0, R1, ShifterOperand(R2));
   __ add(R0, R1, ShifterOperand(0));
@@ -230,9 +234,7 @@
   EmitAndCheck(&assembler, "SimpleMovAdd");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingRegister) {
   // 32 bit variants using low registers.
   __ mvn(R0, ShifterOperand(R1), AL, kCcKeep);
   __ add(R0, R1, ShifterOperand(R2), AL, kCcKeep);
@@ -364,9 +366,7 @@
   EmitAndCheck(&assembler, "DataProcessingRegister");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingImmediate) {
   __ mov(R0, ShifterOperand(0x55));
   __ mvn(R0, ShifterOperand(0x55));
   __ add(R0, R1, ShifterOperand(0x55));
@@ -397,9 +397,7 @@
   EmitAndCheck(&assembler, "DataProcessingImmediate");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mvn(R0, ShifterOperand(0x550055));
   __ add(R0, R1, ShifterOperand(0x550055));
@@ -422,9 +420,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mov(R0, ShifterOperand(0x55005500));
   __ mov(R0, ShifterOperand(0x55555555));
@@ -436,9 +432,7 @@
   EmitAndCheck(&assembler, "DataProcessingModifiedImmediates");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
   // 16-bit variants.
   __ movs(R3, ShifterOperand(R4, LSL, 4));
   __ movs(R3, ShifterOperand(R4, LSR, 5));
@@ -467,10 +461,9 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
-TEST(Thumb2AssemblerTest, ShiftImmediate) {
+TEST_F(Thumb2AssemblerTest, ShiftImmediate) {
   // Note: This test produces the same results as DataProcessingShiftedRegister
   // but it does so using shift functions instead of mov().
-  arm::Thumb2Assembler assembler;
 
   // 16-bit variants.
   __ Lsl(R3, R4, 4);
@@ -500,9 +493,7 @@
   EmitAndCheck(&assembler, "ShiftImmediate");
 }
 
-TEST(Thumb2AssemblerTest, BasicLoad) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicLoad) {
   __ ldr(R3, Address(R4, 24));
   __ ldrb(R3, Address(R4, 24));
   __ ldrh(R3, Address(R4, 24));
@@ -522,9 +513,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicStore) {
   __ str(R3, Address(R4, 24));
   __ strb(R3, Address(R4, 24));
   __ strh(R3, Address(R4, 24));
@@ -539,9 +528,7 @@
   EmitAndCheck(&assembler, "BasicStore");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoad) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoad) {
   __ ldr(R3, Address(R4, 24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -581,9 +568,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, ComplexStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexStore) {
   __ str(R3, Address(R4, 24, Address::Mode::Offset));
   __ str(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ str(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -608,9 +593,7 @@
   EmitAndCheck(&assembler, "ComplexStore");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStore) {
   __ ldr(R3, Address(R4, -24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, -24, Address::Mode::PostIndex));
@@ -670,18 +653,14 @@
   EmitAndCheck(&assembler, "NegativeLoadStore");
 }
 
-TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
 
   EmitAndCheck(&assembler, "SimpleLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ strd(R2, Address(R0, 24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, 24, Address::Mode::PostIndex));
@@ -699,9 +678,7 @@
   EmitAndCheck(&assembler, "ComplexLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStoreDual) {
   __ strd(R2, Address(R0, -24, Address::Mode::Offset));
   __ strd(R2, Address(R0, -24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, -24, Address::Mode::PostIndex));
@@ -719,9 +696,7 @@
   EmitAndCheck(&assembler, "NegativeLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, SimpleBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleBranch) {
   Label l1;
   __ mov(R0, ShifterOperand(2));
   __ Bind(&l1);
@@ -757,8 +732,7 @@
   EmitAndCheck(&assembler, "SimpleBranch");
 }
 
-TEST(Thumb2AssemblerTest, LongBranch) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, LongBranch) {
   __ Force32Bit();
   // 32 bit branches.
   Label l1;
@@ -797,9 +771,7 @@
   EmitAndCheck(&assembler, "LongBranch");
 }
 
-TEST(Thumb2AssemblerTest, LoadMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadMultiple) {
   // 16 bit.
   __ ldm(DB_W, R4, (1 << R0 | 1 << R3));
 
@@ -813,9 +785,7 @@
   EmitAndCheck(&assembler, "LoadMultiple");
 }
 
-TEST(Thumb2AssemblerTest, StoreMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StoreMultiple) {
   // 16 bit.
   __ stm(IA_W, R4, (1 << R0 | 1 << R3));
 
@@ -830,9 +800,7 @@
   EmitAndCheck(&assembler, "StoreMultiple");
 }
 
-TEST(Thumb2AssemblerTest, MovWMovT) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, MovWMovT) {
   // Always 32 bit.
   __ movw(R4, 0);
   __ movw(R4, 0x34);
@@ -848,9 +816,7 @@
   EmitAndCheck(&assembler, "MovWMovT");
 }
 
-TEST(Thumb2AssemblerTest, SpecialAddSub) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SpecialAddSub) {
   __ add(R2, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(SP, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(R8, SP, ShifterOperand(0x50));   // 32 bit.
@@ -869,9 +835,7 @@
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
-TEST(Thumb2AssemblerTest, LoadFromOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadFromOffset) {
   __ LoadFromOffset(kLoadWord, R2, R4, 12);
   __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
   __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
@@ -901,9 +865,7 @@
   EmitAndCheck(&assembler, "LoadFromOffset");
 }
 
-TEST(Thumb2AssemblerTest, StoreToOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StoreToOffset) {
   __ StoreToOffset(kStoreWord, R2, R4, 12);
   __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
   __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
@@ -931,9 +893,7 @@
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-TEST(Thumb2AssemblerTest, IfThen) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, IfThen) {
   __ it(EQ);
   __ mov(R1, ShifterOperand(1), EQ);
 
@@ -964,9 +924,7 @@
   EmitAndCheck(&assembler, "IfThen");
 }
 
-TEST(Thumb2AssemblerTest, CbzCbnz) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CbzCbnz) {
   Label l1;
   __ cbz(R2, &l1);
   __ mov(R1, ShifterOperand(3));
@@ -984,9 +942,7 @@
   EmitAndCheck(&assembler, "CbzCbnz");
 }
 
-TEST(Thumb2AssemblerTest, Multiply) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Multiply) {
   __ mul(R0, R1, R0);
   __ mul(R0, R1, R2);
   __ mul(R8, R9, R8);
@@ -1004,9 +960,7 @@
   EmitAndCheck(&assembler, "Multiply");
 }
 
-TEST(Thumb2AssemblerTest, Divide) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Divide) {
   __ sdiv(R0, R1, R2);
   __ sdiv(R8, R9, R10);
 
@@ -1016,9 +970,7 @@
   EmitAndCheck(&assembler, "Divide");
 }
 
-TEST(Thumb2AssemblerTest, VMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VMov) {
   __ vmovs(S1, 1.0);
   __ vmovd(D1, 1.0);
 
@@ -1029,9 +981,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicFloatingPoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicFloatingPoint) {
   __ vadds(S0, S1, S2);
   __ vsubs(S0, S1, S2);
   __ vmuls(S0, S1, S2);
@@ -1055,9 +1005,7 @@
   EmitAndCheck(&assembler, "BasicFloatingPoint");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointConversions) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointConversions) {
   __ vcvtsd(S2, D2);
   __ vcvtds(D2, S2);
 
@@ -1076,9 +1024,7 @@
   EmitAndCheck(&assembler, "FloatingPointConversions");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointComparisons) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointComparisons) {
   __ vcmps(S0, S1);
   __ vcmpd(D0, D1);
 
@@ -1088,35 +1034,27 @@
   EmitAndCheck(&assembler, "FloatingPointComparisons");
 }
 
-TEST(Thumb2AssemblerTest, Calls) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Calls) {
   __ blx(LR);
   __ bx(LR);
 
   EmitAndCheck(&assembler, "Calls");
 }
 
-TEST(Thumb2AssemblerTest, Breakpoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Breakpoint) {
   __ bkpt(0);
 
   EmitAndCheck(&assembler, "Breakpoint");
 }
 
-TEST(Thumb2AssemblerTest, StrR1) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StrR1) {
   __ str(R1, Address(SP, 68));
   __ str(R1, Address(SP, 1068));
 
   EmitAndCheck(&assembler, "StrR1");
 }
 
-TEST(Thumb2AssemblerTest, VPushPop) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VPushPop) {
   __ vpushs(S2, 4);
   __ vpushd(D2, 4);
 
@@ -1126,9 +1064,7 @@
   EmitAndCheck(&assembler, "VPushPop");
 }
 
-TEST(Thumb2AssemblerTest, Max16BitBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Max16BitBranch) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) ; i += 2) {
@@ -1140,9 +1076,7 @@
   EmitAndCheck(&assembler, "Max16BitBranch");
 }
 
-TEST(Thumb2AssemblerTest, Branch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Branch32) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1154,9 +1088,7 @@
   EmitAndCheck(&assembler, "Branch32");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchMax) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchMax) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) ; i += 2) {
@@ -1168,9 +1100,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchMax");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) + 2 ; i += 2) {
@@ -1182,9 +1112,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation16");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1196,9 +1124,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation32");
 }
 
-TEST(Thumb2AssemblerTest, MixedBranch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, MixedBranch32) {
   Label l1;
   Label l2;
   __ b(&l1);      // Forwards.
@@ -1215,9 +1141,7 @@
   EmitAndCheck(&assembler, "MixedBranch32");
 }
 
-TEST(Thumb2AssemblerTest, Shifts) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Shifts) {
   // 16 bit selected for CcDontCare.
   __ Lsl(R0, R1, 5);
   __ Lsr(R0, R1, 5);
@@ -1292,9 +1216,7 @@
   EmitAndCheck(&assembler, "Shifts");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreRegOffset) {
   // 16 bit.
   __ ldr(R0, Address(R1, R2));
   __ str(R0, Address(R1, R2));
@@ -1319,9 +1241,7 @@
   EmitAndCheck(&assembler, "LoadStoreRegOffset");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLiteral) {
   __ ldr(R0, Address(4));
   __ str(R0, Address(4));
 
@@ -1337,9 +1257,7 @@
   EmitAndCheck(&assembler, "LoadStoreLiteral");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLimits) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLimits) {
   __ ldr(R0, Address(R4, 124));     // 16 bit.
   __ ldr(R0, Address(R4, 128));     // 32 bit.
 
@@ -1367,9 +1285,7 @@
   EmitAndCheck(&assembler, "LoadStoreLimits");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranch) {
   Label label;
   __ CompareAndBranchIfZero(arm::R0, &label);
   __ CompareAndBranchIfZero(arm::R11, &label);
@@ -1380,9 +1296,7 @@
   EmitAndCheck(&assembler, "CompareAndBranch");
 }
 
-TEST(Thumb2AssemblerTest, AddConstant) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, AddConstant) {
   // Low registers, Rd != Rn.
   __ AddConstant(R0, R1, 0);                          // MOV.
   __ AddConstant(R0, R1, 1);                          // 16-bit ADDS, encoding T1.
@@ -1626,9 +1540,7 @@
   EmitAndCheck(&assembler, "AddConstant");
 }
 
-TEST(Thumb2AssemblerTest, CmpConstant) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CmpConstant) {
   __ CmpConstant(R0, 0);                              // 16-bit CMP.
   __ CmpConstant(R1, 1);                              // 16-bit CMP.
   __ CmpConstant(R0, 7);                              // 16-bit CMP.
diff --git a/compiler/utils/intrusive_forward_list.h b/compiler/utils/intrusive_forward_list.h
new file mode 100644
index 0000000..ec2c087
--- /dev/null
+++ b/compiler/utils/intrusive_forward_list.h
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
+#define ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
+
+#include <stdint.h>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace art {
+
+struct IntrusiveForwardListHook {
+  IntrusiveForwardListHook() : next_hook(nullptr) { }
+  explicit IntrusiveForwardListHook(const IntrusiveForwardListHook* hook) : next_hook(hook) { }
+
+  // Allow copyable values but do not copy the hook, it is not part of the value.
+  IntrusiveForwardListHook(const IntrusiveForwardListHook& other ATTRIBUTE_UNUSED)
+      : next_hook(nullptr) { }
+  IntrusiveForwardListHook& operator=(const IntrusiveForwardListHook& src ATTRIBUTE_UNUSED) {
+    return *this;
+  }
+
+  mutable const IntrusiveForwardListHook* next_hook;
+};
+
+template <typename T, IntrusiveForwardListHook T::* NextPtr = &T::hook>
+class IntrusiveForwardListMemberHook;
+
+template <typename T, typename HookTraits = IntrusiveForwardListMemberHook<T>>
+class IntrusiveForwardList;
+
+template <typename T, typename HookTraits>
+class IntrusiveForwardListIterator : public std::iterator<std::forward_iterator_tag, T> {
+ public:
+  // Construct/copy/destroy (except the private constructor used by IntrusiveForwardList<>).
+  IntrusiveForwardListIterator() : hook_(nullptr) { }
+  IntrusiveForwardListIterator(const IntrusiveForwardListIterator& src) = default;
+  IntrusiveForwardListIterator& operator=(const IntrusiveForwardListIterator& src) = default;
+
+  // Conversion from iterator to const_iterator.
+  template <typename OtherT,
+            typename = typename std::enable_if<std::is_same<T, const OtherT>::value>::type>
+  IntrusiveForwardListIterator(const IntrusiveForwardListIterator<OtherT, HookTraits>& src)
+      : hook_(src.hook_) { }
+
+  // Iteration.
+  IntrusiveForwardListIterator& operator++() {
+    DCHECK(hook_ != nullptr);
+    hook_ = hook_->next_hook;
+    return *this;
+  }
+  IntrusiveForwardListIterator operator++(int) {
+    IntrusiveForwardListIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+
+  // Dereference
+  T& operator*() const {
+    DCHECK(hook_ != nullptr);
+    return *HookTraits::GetValue(hook_);
+  }
+  T* operator->() const {
+    return &**this;
+  }
+
+ private:
+  explicit IntrusiveForwardListIterator(const IntrusiveForwardListHook* hook) : hook_(hook) { }
+
+  const IntrusiveForwardListHook* hook_;
+
+  template <typename OtherT, typename OtherTraits>
+  friend class IntrusiveForwardListIterator;
+
+  template <typename OtherT, typename OtherTraits>
+  friend class IntrusiveForwardList;
+
+  template <typename OtherT1, typename OtherT2, typename OtherTraits>
+  friend typename std::enable_if<std::is_same<const OtherT1, const OtherT2>::value, bool>::type
+  operator==(const IntrusiveForwardListIterator<OtherT1, OtherTraits>& lhs,
+             const IntrusiveForwardListIterator<OtherT2, OtherTraits>& rhs);
+};
+
+template <typename T, typename OtherT, typename HookTraits>
+typename std::enable_if<std::is_same<const T, const OtherT>::value, bool>::type operator==(
+    const IntrusiveForwardListIterator<T, HookTraits>& lhs,
+    const IntrusiveForwardListIterator<OtherT, HookTraits>& rhs) {
+  return lhs.hook_ == rhs.hook_;
+}
+
+template <typename T, typename OtherT, typename HookTraits>
+typename std::enable_if<std::is_same<const T, const OtherT>::value, bool>::type operator!=(
+    const IntrusiveForwardListIterator<T, HookTraits>& lhs,
+    const IntrusiveForwardListIterator<OtherT, HookTraits>& rhs) {
+  return !(lhs == rhs);
+}
+
+// Intrusive version of std::forward_list<>. See also slist<> in Boost.Intrusive.
+//
+// This class template provides the same interface as std::forward_list<> as long
+// as the functions are meaningful for an intrusive container; this excludes emplace
+// functions and functions taking an std::initializer_list<> as the container does
+// not construct elements.
+template <typename T, typename HookTraits>
+class IntrusiveForwardList {
+ public:
+  typedef HookTraits hook_traits;
+  typedef       T  value_type;
+  typedef       T& reference;
+  typedef const T& const_reference;
+  typedef       T* pointer;
+  typedef const T* const_pointer;
+  typedef IntrusiveForwardListIterator<      T, hook_traits> iterator;
+  typedef IntrusiveForwardListIterator<const T, hook_traits> const_iterator;
+
+  // Construct/copy/destroy.
+  IntrusiveForwardList() = default;
+  template <typename InputIterator>
+  IntrusiveForwardList(InputIterator first, InputIterator last) : IntrusiveForwardList() {
+    insert_after(before_begin(), first, last);
+  }
+  IntrusiveForwardList(IntrusiveForwardList&& src) : first_(src.first_.next_hook) {
+    src.first_.next_hook = nullptr;
+  }
+  IntrusiveForwardList& operator=(const IntrusiveForwardList& src) = delete;
+  IntrusiveForwardList& operator=(IntrusiveForwardList&& src) {
+    IntrusiveForwardList tmp(std::move(src));
+    tmp.swap(*this);
+    return *this;
+  }
+  ~IntrusiveForwardList() = default;
+
+  // Iterators.
+  iterator before_begin() { return iterator(&first_); }
+  const_iterator before_begin() const { return const_iterator(&first_); }
+  iterator begin() { return iterator(first_.next_hook); }
+  const_iterator begin() const { return const_iterator(first_.next_hook); }
+  iterator end() { return iterator(nullptr); }
+  const_iterator end() const { return const_iterator(nullptr); }
+  const_iterator cbefore_begin() const { return const_iterator(&first_); }
+  const_iterator cbegin() const { return const_iterator(first_.next_hook); }
+  const_iterator cend() const { return const_iterator(nullptr); }
+
+  // Capacity.
+  bool empty() const { return begin() == end(); }
+  size_t max_size() { return static_cast<size_t>(-1); }
+
+  // Element access.
+  reference front() { return *begin(); }
+  const_reference front() const { return *begin(); }
+
+  // Modifiers.
+  template <typename InputIterator>
+  void assign(InputIterator first, InputIterator last) {
+    IntrusiveForwardList tmp(first, last);
+    tmp.swap(*this);
+  }
+  void push_front(value_type& value) {
+    insert_after(before_begin(), value);
+  }
+  void pop_front() {
+    DCHECK(!empty());
+    erase_after(before_begin());
+  }
+  iterator insert_after(const_iterator position, value_type& value) {
+    const IntrusiveForwardListHook* new_hook = hook_traits::GetHook(&value);
+    new_hook->next_hook = position.hook_->next_hook;
+    position.hook_->next_hook = new_hook;
+    return iterator(new_hook);
+  }
+  template <typename InputIterator>
+  iterator insert_after(const_iterator position, InputIterator first, InputIterator last) {
+    while (first != last) {
+      position = insert_after(position, *first++);
+    }
+    return iterator(position.hook_);
+  }
+  iterator erase_after(const_iterator position) {
+    const_iterator last = position;
+    std::advance(last, 2);
+    return erase_after(position, last);
+  }
+  iterator erase_after(const_iterator position, const_iterator last) {
+    DCHECK(position != last);
+    position.hook_->next_hook = last.hook_;
+    return iterator(last.hook_);
+  }
+  void swap(IntrusiveForwardList& other) {
+    std::swap(first_.next_hook, other.first_.next_hook);
+  }
+  void clear() {
+    first_.next_hook = nullptr;
+  }
+
+  // Operations.
+  void splice_after(const_iterator position, IntrusiveForwardList& src) {
+    DCHECK(position != end());
+    splice_after(position, src, src.before_begin(), src.end());
+  }
+  void splice_after(const_iterator position, IntrusiveForwardList&& src) {
+    splice_after(position, src);  // Use l-value overload.
+  }
+  // Splice the element after `i`.
+  void splice_after(const_iterator position, IntrusiveForwardList& src, const_iterator i) {
+    // The standard specifies that this version does nothing if `position == i`
+    // or `position == ++i`. We must handle the latter here because the overload
+    // `splice_after(position, src, first, last)` does not allow `position` inside
+    // the range `(first, last)`.
+    if (++const_iterator(i) == position) {
+      return;
+    }
+    const_iterator last = i;
+    std::advance(last, 2);
+    splice_after(position, src, i, last);
+  }
+  // Splice the element after `i`.
+  void splice_after(const_iterator position, IntrusiveForwardList&& src, const_iterator i) {
+    splice_after(position, src, i);  // Use l-value overload.
+  }
+  // Splice elements between `first` and `last`, i.e. open range `(first, last)`.
+  void splice_after(const_iterator position,
+                    IntrusiveForwardList& src,
+                    const_iterator first,
+                    const_iterator last) {
+    DCHECK(position != end());
+    DCHECK(first != last);
+    if (++const_iterator(first) == last) {
+      // Nothing to do.
+      return;
+    }
+    // If position is just before end() and last is src.end(), we can finish this quickly.
+    if (++const_iterator(position) == end() && last == src.end()) {
+      position.hook_->next_hook = first.hook_->next_hook;
+      first.hook_->next_hook = nullptr;
+      return;
+    }
+    // Otherwise we need to find the position before last to fix up the hook.
+    const_iterator before_last = first;
+    while (++const_iterator(before_last) != last) {
+      ++before_last;
+    }
+    // Detach (first, last).
+    const IntrusiveForwardListHook* first_taken = first.hook_->next_hook;
+    first.hook_->next_hook = last.hook_;
+    // Attach the sequence to the new position.
+    before_last.hook_->next_hook = position.hook_->next_hook;
+    position.hook_->next_hook = first_taken;
+  }
+  // Splice elements between `first` and `last`, i.e. open range `(first, last)`.
+  void splice_after(const_iterator position,
+                    IntrusiveForwardList&& src,
+                    const_iterator first,
+                    const_iterator last) {
+    splice_after(position, src, first, last);  // Use l-value overload.
+  }
+  void remove(const value_type& value) {
+    remove_if([value](const value_type& v) { return value == v; });
+  }
+  template <typename Predicate>
+  void remove_if(Predicate pred) {
+    iterator prev = before_begin();
+    for (iterator current = begin(); current != end(); ++current) {
+      if (pred(*current)) {
+        erase_after(prev);
+        current = prev;
+      } else {
+        prev = current;
+      }
+    }
+  }
+  void unique() {
+    unique(std::equal_to<value_type>());
+  }
+  template <typename BinaryPredicate>
+  void unique(BinaryPredicate pred) {
+    if (!empty()) {
+      iterator prev = begin();
+      iterator current = prev;
+      ++current;
+      for (; current != end(); ++current) {
+        if (pred(*prev, *current)) {
+          erase_after(prev);
+          current = prev;
+        } else {
+          prev = current;
+        }
+      }
+    }
+  }
+  void merge(IntrusiveForwardList& other) {
+    merge(other, std::less<value_type>());
+  }
+  void merge(IntrusiveForwardList&& other) {
+    merge(other);  // Use l-value overload.
+  }
+  template <typename Compare>
+  void merge(IntrusiveForwardList& other, Compare cmp) {
+    iterator prev = before_begin();
+    iterator current = begin();
+    iterator other_prev = other.before_begin();
+    iterator other_current = other.begin();
+    while (current != end() && other_current != other.end()) {
+      if (cmp(*other_current, *current)) {
+        ++other_current;
+        splice_after(prev, other, other_prev);
+        ++prev;
+      } else {
+        prev = current;
+        ++current;
+      }
+      DCHECK(++const_iterator(prev) == current);
+      DCHECK(++const_iterator(other_prev) == other_current);
+    }
+    splice_after(prev, other);
+  }
+  template <typename Compare>
+  void merge(IntrusiveForwardList&& other, Compare cmp) {
+    merge(other, cmp);  // Use l-value overload.
+  }
+  void sort() {
+    sort(std::less<value_type>());
+  }
+  template <typename Compare>
+  void sort(Compare cmp) {
+    size_t n = std::distance(begin(), end());
+    if (n >= 2u) {
+      const_iterator middle = before_begin();
+      std::advance(middle, n / 2u);
+      IntrusiveForwardList second_half;
+      second_half.splice_after(second_half.before_begin(), *this, middle, end());
+      sort(cmp);
+      second_half.sort(cmp);
+      merge(second_half, cmp);
+    }
+  }
+  void reverse() {
+    IntrusiveForwardList reversed;
+    while (!empty()) {
+      value_type& value = front();
+      erase_after(before_begin());
+      reversed.insert_after(reversed.before_begin(), value);
+    }
+    reversed.swap(*this);
+  }
+
+  // Extensions.
+  bool HasExactlyOneElement() const {
+    return !empty() && ++begin() == end();
+  }
+  size_t SizeSlow() const {
+    return std::distance(begin(), end());
+  }
+  bool ContainsNode(const_reference node) const {
+    for (auto&& n : *this) {
+      if (std::addressof(n) == std::addressof(node)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  static IntrusiveForwardListHook* ModifiableHook(const IntrusiveForwardListHook* hook) {
+    return const_cast<IntrusiveForwardListHook*>(hook);
+  }
+
+  IntrusiveForwardListHook first_;
+};
+
+template <typename T, typename HookTraits>
+void swap(IntrusiveForwardList<T, HookTraits>& lhs, IntrusiveForwardList<T, HookTraits>& rhs) {
+  lhs.swap(rhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator==(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  auto lit = lhs.begin();
+  auto rit = rhs.begin();
+  for (; lit != lhs.end() && rit != rhs.end(); ++lit, ++rit) {
+    if (*lit != *rit) {
+      return false;
+    }
+  }
+  return lit == lhs.end() && rit == rhs.end();
+}
+
+template <typename T, typename HookTraits>
+bool operator!=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator<(const IntrusiveForwardList<T, HookTraits>& lhs,
+               const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+template <typename T, typename HookTraits>
+bool operator>(const IntrusiveForwardList<T, HookTraits>& lhs,
+               const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return rhs < lhs;
+}
+
+template <typename T, typename HookTraits>
+bool operator<=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(rhs < lhs);
+}
+
+template <typename T, typename HookTraits>
+bool operator>=(const IntrusiveForwardList<T, HookTraits>& lhs,
+                const IntrusiveForwardList<T, HookTraits>& rhs) {
+  return !(lhs < rhs);
+}
+
+template <typename T, IntrusiveForwardListHook T::* NextPtr>
+class IntrusiveForwardListMemberHook {
+ public:
+  static const IntrusiveForwardListHook* GetHook(const T* value) {
+    return &(value->*NextPtr);
+  }
+
+  static T* GetValue(const IntrusiveForwardListHook* hook) {
+    return reinterpret_cast<T*>(
+        reinterpret_cast<uintptr_t>(hook) - OFFSETOF_MEMBERPTR(T, NextPtr));
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
diff --git a/compiler/utils/intrusive_forward_list_test.cc b/compiler/utils/intrusive_forward_list_test.cc
new file mode 100644
index 0000000..517142e
--- /dev/null
+++ b/compiler/utils/intrusive_forward_list_test.cc
@@ -0,0 +1,505 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <forward_list>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "intrusive_forward_list.h"
+
+namespace art {
+
+struct IFLTestValue {
+  // Deliberately not explicit.
+  IFLTestValue(int v) : hook(), value(v) { }  // NOLINT(runtime/explicit)
+
+  IntrusiveForwardListHook hook;
+  int value;
+};
+
+bool operator==(const IFLTestValue& lhs, const IFLTestValue& rhs) {
+  return lhs.value == rhs.value;
+}
+
+bool operator<(const IFLTestValue& lhs, const IFLTestValue& rhs) {
+  return lhs.value < rhs.value;
+}
+
+#define ASSERT_LISTS_EQUAL(expected, value)                                   \
+  do {                                                                        \
+    ASSERT_EQ(expected.empty(), value.empty());                               \
+    ASSERT_EQ(std::distance(expected.begin(), expected.end()),                \
+              std::distance(value.begin(), value.end()));                     \
+    ASSERT_TRUE(std::equal(expected.begin(), expected.end(), value.begin())); \
+  } while (false)
+
+TEST(IntrusiveForwardList, IteratorToConstIterator) {
+  IntrusiveForwardList<IFLTestValue> ifl;
+  IntrusiveForwardList<IFLTestValue>::iterator begin = ifl.begin();
+  IntrusiveForwardList<IFLTestValue>::const_iterator cbegin = ifl.cbegin();
+  IntrusiveForwardList<IFLTestValue>::const_iterator converted_begin = begin;
+  ASSERT_TRUE(converted_begin == cbegin);
+}
+
+TEST(IntrusiveForwardList, IteratorOperators) {
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ASSERT_TRUE(ifl.begin() == ifl.cbegin());
+  ASSERT_FALSE(ifl.begin() != ifl.cbegin());
+  ASSERT_TRUE(ifl.end() == ifl.cend());
+  ASSERT_FALSE(ifl.end() != ifl.cend());
+
+  ASSERT_TRUE(ifl.begin() == ifl.end());  // Empty.
+  ASSERT_FALSE(ifl.begin() != ifl.end());  // Empty.
+
+  IFLTestValue value(1);
+  ifl.insert_after(ifl.cbefore_begin(), value);
+
+  ASSERT_FALSE(ifl.begin() == ifl.end());  // Not empty.
+  ASSERT_TRUE(ifl.begin() != ifl.end());  // Not empty.
+}
+
+TEST(IntrusiveForwardList, ConstructRange) {
+  std::forward_list<int> ref({ 1, 2, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, Assign) {
+  std::forward_list<int> ref1({ 2, 8, 5 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ifl.assign(storage1.begin(), storage1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl);
+  std::forward_list<int> ref2({ 7, 1, 3 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  ifl.assign(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref2, ifl);
+}
+
+TEST(IntrusiveForwardList, PushPop) {
+  IFLTestValue value3(3);
+  IFLTestValue value7(7);
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.push_front(3);
+  ifl.push_front(value3);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(3, ifl.front());
+  ref.push_front(7);
+  ifl.push_front(value7);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(7, ifl.front());
+  ref.pop_front();
+  ifl.pop_front();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(3, ifl.front());
+  ref.pop_front();
+  ifl.pop_front();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, InsertAfter1) {
+  IFLTestValue value4(4);
+  IFLTestValue value8(8);
+  IFLTestValue value5(5);
+  IFLTestValue value3(3);
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+
+  auto ref_it = ref.insert_after(ref.before_begin(), 4);
+  auto ifl_it = ifl.insert_after(ifl.before_begin(), value4);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+
+  ref_it = ref.insert_after(ref.begin(), 8);
+  ifl_it = ifl.insert_after(ifl.begin(), value8);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.insert_after(ref.begin(), 5);
+  ifl_it = ifl.insert_after(ifl.begin(), value5);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  ref_it = ref.insert_after(ref_it, 3);
+  ifl_it = ifl.insert_after(ifl_it, value3);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+}
+
+TEST(IntrusiveForwardList, InsertAfter2) {
+  std::forward_list<int> ref;
+  IntrusiveForwardList<IFLTestValue> ifl;
+
+  auto ref_it = ref.insert_after(ref.before_begin(), { 2, 8, 5 });
+  std::vector<IFLTestValue> storage1({ { 2 }, { 8 }, { 5 } });
+  auto ifl_it = ifl.insert_after(ifl.before_begin(), storage1.begin(), storage1.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  std::vector<IFLTestValue> storage2({ { 7 }, { 2 } });
+  ref_it = ref.insert_after(ref.begin(), { 7, 2 });
+  ifl_it = ifl.insert_after(ifl.begin(), storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(*ref_it, *ifl_it);
+
+  std::vector<IFLTestValue> storage3({ { 1 }, { 3 }, { 4 }, { 9 } });
+  ref_it = ref.begin();
+  ifl_it = ifl.begin();
+  std::advance(ref_it, std::distance(ref.begin(), ref.end()) - 1);
+  std::advance(ifl_it, std::distance(ifl.begin(), ifl.end()) - 1);
+  ref_it = ref.insert_after(ref_it, { 1, 3, 4, 9 });
+  ifl_it = ifl.insert_after(ifl_it, storage3.begin(), storage3.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, EraseAfter1) {
+  std::forward_list<int> ref({ 1, 2, 7, 4, 5 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 5);
+
+  auto ref_it = ref.begin();
+  auto ifl_it = ifl.begin();
+  std::advance(ref_it, 2);
+  std::advance(ifl_it, 2);
+  ref_it = ref.erase_after(ref_it);
+  ifl_it = ifl.erase_after(ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 4);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.begin();
+  ifl_it = ifl.begin();
+  std::advance(ref_it, 2);
+  std::advance(ifl_it, 2);
+  ref_it = ref.erase_after(ref_it);
+  ifl_it = ifl.erase_after(ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 3);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+
+  ref_it = ref.erase_after(ref.begin());
+  ifl_it = ifl.erase_after(ifl.begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 2);
+  CHECK(ref_it != ref.end());
+  ASSERT_TRUE(ifl_it != ifl.end());
+  CHECK(++ref_it == ref.end());
+  ASSERT_TRUE(++ifl_it == ifl.end());
+
+  ref_it = ref.erase_after(ref.before_begin());
+  ifl_it = ifl.erase_after(ifl.before_begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 1);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+
+  ref_it = ref.erase_after(ref.before_begin());
+  ifl_it = ifl.erase_after(ifl.before_begin());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 0);
+  CHECK(ref_it == ref.begin());
+  ASSERT_TRUE(ifl_it == ifl.begin());
+}
+
+TEST(IntrusiveForwardList, EraseAfter2) {
+  std::forward_list<int> ref({ 1, 2, 7, 4, 5, 3, 2, 8, 9 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 9);
+
+  auto ref_it = ref.begin();
+  auto ifl_it = ifl.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref_it = ref.erase_after(ref.begin(), ref_it);
+  ifl_it = ifl.erase_after(ifl.begin(), ifl_it);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ASSERT_EQ(std::distance(ref.begin(), ref_it), std::distance(ifl.begin(), ifl_it));
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 7);
+
+  ref_it = ref.erase_after(ref_it, ref.end());
+  ifl_it = ifl.erase_after(ifl_it, ifl.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 2);
+
+  ref_it = ref.erase_after(ref.before_begin(), ref.end());
+  ifl_it = ifl.erase_after(ifl.before_begin(), ifl.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(ref_it == ref.end());
+  ASSERT_TRUE(ifl_it == ifl.end());
+  CHECK_EQ(std::distance(ref.begin(), ref.end()), 0);
+}
+
+TEST(IntrusiveForwardList, SwapClear) {
+  std::forward_list<int> ref1({ 1, 2, 7 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::forward_list<int> ref2({ 3, 8, 6 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.swap(ref2);
+  ifl1.swap(ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.clear();
+  ifl1.clear();
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  swap(ref1, ref2);
+  swap(ifl1, ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ref1.clear();
+  ifl1.clear();
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+}
+
+TEST(IntrusiveForwardList, SpliceAfter) {
+  std::forward_list<int> ref1({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
+  std::forward_list<int> ref2;
+  std::vector<IFLTestValue> storage(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage.begin(), storage.end());
+  IntrusiveForwardList<IFLTestValue> ifl2;
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move everything to ref2/ifl2.
+  ref2.splice_after(ref2.before_begin(), ref1);
+  ifl2.splice_after(ifl2.before_begin(), ifl1);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move first element (3) to ref1/ifl1.
+  ref1.splice_after(ref1.before_begin(), ref2, ref2.before_begin());
+  ifl1.splice_after(ifl1.before_begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move second element (2) to ref1/ifl1 after the first element (3).
+  ref1.splice_after(ref1.begin(), ref2, ref2.begin());
+  ifl1.splice_after(ifl1.begin(), ifl2, ifl2.begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move everything from ref2/ifl2 between the 2 elements now in ref1/ifl1.
+  ref1.splice_after(ref1.begin(), ref2);
+  ifl1.splice_after(ifl1.begin(), ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  std::forward_list<int> check({ 3, 1, 7, 4, 5, 4, 8, 7, 2 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+  ASSERT_TRUE(ifl2.empty());
+
+  // Empty splice_after().
+  ref2.splice_after(
+      ref2.before_begin(), ref1, ref1.before_begin(), ref1.begin());
+  ifl2.splice_after(ifl2.before_begin(), ifl1, ifl1.before_begin(), ifl1.begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move { 1, 7 } to ref2/ifl2.
+  auto ref_it = ref1.begin();
+  auto ifl_it = ifl1.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref2.splice_after(ref2.before_begin(), ref1, ref1.begin(), ref_it);
+  ifl2.splice_after(ifl2.before_begin(), ifl1, ifl1.begin(), ifl_it);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  // Move { 8, 7, 2 } to the beginning of ref1/ifl1.
+  ref_it = ref1.begin();
+  ifl_it = ifl1.begin();
+  std::advance(ref_it, 3);
+  std::advance(ifl_it, 3);
+  ref1.splice_after(ref1.before_begin(), ref1, ref_it, ref1.end());
+  ifl1.splice_after(ifl1.before_begin(), ifl1, ifl_it, ifl1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+
+  check.assign({ 8, 7, 2, 3, 4, 5, 4 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+  check.assign({ 1, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move all but the first element to ref2/ifl2.
+  ref_it = ref2.begin();
+  ifl_it = ifl2.begin();
+  std::advance(ref_it, 1);
+  std::advance(ifl_it, 1);
+  ref2.splice_after(ref_it, ref1, ref1.begin(), ref1.end());
+  ifl2.splice_after(ifl_it, ifl1, ifl1.begin(), ifl1.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+
+  check.assign({8});
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  // Move the first element of ref1/ifl1 to the beginning of ref1/ifl1 (do nothing).
+  ref1.splice_after(ref1.before_begin(), ref1, ref1.before_begin());
+  ifl1.splice_after(ifl1.before_begin(), ifl1, ifl1.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  // Move the first element of ref2/ifl2 after itself (do nothing).
+  ref1.splice_after(ref1.begin(), ref1, ref1.before_begin());
+  ifl1.splice_after(ifl1.begin(), ifl1, ifl1.before_begin());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(check, ifl1);
+
+  check.assign({ 1, 7, 7, 2, 3, 4, 5, 4 });
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move the first element of ref2/ifl2 to the beginning of ref2/ifl2 (do nothing).
+  ref2.splice_after(ref2.before_begin(), ref2, ref2.before_begin());
+  ifl2.splice_after(ifl2.before_begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ASSERT_LISTS_EQUAL(check, ifl2);
+
+  // Move the first element of ref2/ifl2 after itself (do nothing).
+  ref2.splice_after(ref2.begin(), ref2, ref2.before_begin());
+  ifl2.splice_after(ifl2.begin(), ifl2, ifl2.before_begin());
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  ASSERT_LISTS_EQUAL(check, ifl2);
+}
+
+TEST(IntrusiveForwardList, Remove) {
+  std::forward_list<int> ref({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.remove(1);
+  ifl.remove(1);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.remove(4);
+  ifl.remove(4);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto odd = [](IFLTestValue value) { return (value.value & 1) != 0; };  // NOLINT(readability/braces)
+  ref.remove_if(odd);
+  ifl.remove_if(odd);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto all = [](IFLTestValue value ATTRIBUTE_UNUSED) { return true; };  // NOLINT(readability/braces)
+  ref.remove_if(all);
+  ifl.remove_if(all);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST(IntrusiveForwardList, Unique) {
+  std::forward_list<int> ref({ 3, 1, 1, 2, 3, 3, 7, 7, 4, 4, 5, 7 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  ref.unique();
+  ifl.unique();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 3, 1, 2, 3, 7, 4, 5, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+
+  auto bin_pred = [](IFLTestValue lhs, IFLTestValue rhs) {
+    return (lhs.value & ~1) == (rhs.value & ~1);
+  };
+  ref.unique(bin_pred);
+  ifl.unique(bin_pred);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  check.assign({ 3, 1, 2, 7, 4, 7 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Merge) {
+  std::forward_list<int> ref1({ 1, 4, 8, 8, 12 });
+  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
+  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::forward_list<int> ref2({ 3, 5, 6, 7, 9 });
+  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  CHECK(std::is_sorted(ref1.begin(), ref1.end()));
+  CHECK(std::is_sorted(ref2.begin(), ref2.end()));
+  ref1.merge(ref2);
+  ifl1.merge(ifl2);
+  ASSERT_LISTS_EQUAL(ref1, ifl1);
+  ASSERT_LISTS_EQUAL(ref2, ifl2);
+  CHECK(ref2.empty());
+  std::forward_list<int> check({ 1, 3, 4, 5, 6, 7, 8, 8, 9, 12 });
+  ASSERT_LISTS_EQUAL(check, ifl1);
+}
+
+TEST(IntrusiveForwardList, Sort1) {
+  std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(!std::is_sorted(ref.begin(), ref.end()));
+  ref.sort();
+  ifl.sort();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 0, 1, 2, 3, 3, 4, 5, 7, 8, 9 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Sort2) {
+  std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  auto cmp = [](IFLTestValue lhs, IFLTestValue rhs) {
+    return (lhs.value & ~1) < (rhs.value & ~1);
+  };
+  CHECK(!std::is_sorted(ref.begin(), ref.end(), cmp));
+  ref.sort(cmp);
+  ifl.sort(cmp);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 1, 0, 2, 3, 3, 4, 5, 7, 9, 8 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+TEST(IntrusiveForwardList, Reverse) {
+  std::forward_list<int> ref({ 8, 3, 5, 4, 1, 3 });
+  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
+  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  CHECK(!std::is_sorted(ref.begin(), ref.end()));
+  ref.reverse();
+  ifl.reverse();
+  ASSERT_LISTS_EQUAL(ref, ifl);
+  std::forward_list<int> check({ 3, 1, 4, 5, 3, 8 });
+  ASSERT_LISTS_EQUAL(check, ifl);
+}
+
+}  // namespace art
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index ffac4c4..ecb67bd 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -102,8 +102,10 @@
 
 class MipsAssembler FINAL : public Assembler {
  public:
-  explicit MipsAssembler(const MipsInstructionSetFeatures* instruction_set_features = nullptr)
-      : overwriting_(false),
+  explicit MipsAssembler(ArenaAllocator* arena,
+                         const MipsInstructionSetFeatures* instruction_set_features = nullptr)
+      : Assembler(arena),
+        overwriting_(false),
         overwrite_location_(0),
         last_position_adjustment_(0),
         last_old_position_(0),
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 71f5e00..8acc38a 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -102,8 +102,9 @@
 
 class Mips64Assembler FINAL : public Assembler {
  public:
-  Mips64Assembler()
-      : overwriting_(false),
+  explicit Mips64Assembler(ArenaAllocator* arena)
+      : Assembler(arena),
+        overwriting_(false),
         overwrite_location_(0),
         last_position_adjustment_(0),
         last_old_position_(0),
diff --git a/compiler/utils/string_reference.h b/compiler/utils/string_reference.h
new file mode 100644
index 0000000..72552f2
--- /dev/null
+++ b/compiler/utils/string_reference.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#define ART_COMPILER_UTILS_STRING_REFERENCE_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "utf-inl.h"
+
+namespace art {
+
+class DexFile;
+
+// A string is uniquely located by its DexFile and the string_ids_ table index into that DexFile.
+struct StringReference {
+  StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
+
+  const DexFile* dex_file;
+  uint32_t string_index;
+};
+
+// Compare the actual referenced string values. Used for string reference deduplication.
+struct StringReferenceValueComparator {
+  bool operator()(StringReference sr1, StringReference sr2) const {
+    // Note that we want to deduplicate identical strings even if they are referenced
+    // by different dex files, so we need some (any) total ordering of strings, rather
+    // than references. However, the references should usually be from the same dex file,
+    // so we choose the dex file string ordering so that we can simply compare indexes
+    // and avoid the costly string comparison in the most common case.
+    if (sr1.dex_file == sr2.dex_file) {
+      // Use the string order enforced by the dex file verifier.
+      DCHECK_EQ(
+          sr1.string_index < sr2.string_index,
+          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
+              sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
+              sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0);
+      return sr1.string_index < sr2.string_index;
+    } else {
+      // Cannot compare indexes, so do the string comparison.
+      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
+          sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
+          sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0;
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_STRING_REFERENCE_H_
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index 2958dc6..fb37804 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -88,7 +88,7 @@
     std::memset(header_data.data, 0, sizeof(header_data.data));
     DexFile::Header* header = reinterpret_cast<DexFile::Header*>(&header_data.data);
     std::copy_n(DexFile::kDexMagic, 4u, header->magic_);
-    std::copy_n(DexFile::kDexMagicVersion, 4u, header->magic_ + 4u);
+    std::copy_n(DexFile::kDexMagicVersions[0], 4u, header->magic_ + 4u);
     header->header_size_ = sizeof(DexFile::Header);
     header->endian_tag_ = DexFile::kDexEndianConstant;
     header->link_size_ = 0u;  // Unused.
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 3efef70..84cdb7d 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1030,6 +1030,14 @@
 }
 
 
+void X86Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -2379,7 +2387,7 @@
 }
 
 void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath(stack_adjust);
+  X86ExceptionSlowPath* slow = new (GetArena()) X86ExceptionSlowPath(stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<4>()), Immediate(0));
   j(kNotEqual, slow->Entry());
@@ -2402,7 +2410,7 @@
 }
 
 void X86Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   // Generate the data for the literal area.
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 00ff7bd..bc46e9f 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -18,12 +18,15 @@
 #define ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
 
 #include <vector>
+
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
 #include "managed_register_x86.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -260,7 +263,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -290,18 +293,18 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 class X86Assembler FINAL : public Assembler {
  public:
-  X86Assembler() {}
+  explicit X86Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86Assembler() {}
 
   /*
@@ -476,6 +479,7 @@
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index d0d5147..28043c9 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -16,13 +16,16 @@
 
 #include "assembler_x86.h"
 
+#include "base/arena_allocator.h"
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
 namespace art {
 
 TEST(AssemblerX86, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
@@ -386,4 +389,10 @@
   DriverStr(expected, "near_label");
 }
 
+TEST_F(AssemblerX86Test, Cmpb) {
+  GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0));
+  const char* expected = "cmpb $0, 128(%EDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d86ad1b..5e7b587 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1224,6 +1224,16 @@
 }
 
 
+void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
+  EmitOptionalRex32(address);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());
@@ -3144,7 +3154,7 @@
 };
 
 void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86_64ExceptionSlowPath* slow = new X86_64ExceptionSlowPath(stack_adjust);
+  X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0));
   j(kNotEqual, slow->Entry());
@@ -3167,7 +3177,7 @@
 }
 
 void X86_64Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     EmitInt32(area[i]);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index f00cb12..720a402 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -19,12 +19,14 @@
 
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "constants_x86_64.h"
 #include "globals.h"
 #include "managed_register_x86_64.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -270,7 +272,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -296,13 +298,13 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 
@@ -332,7 +334,7 @@
 
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() {}
+  explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -504,6 +506,7 @@
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4f65709..9dccc9f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -27,7 +27,9 @@
 namespace art {
 
 TEST(AssemblerX86_64, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
@@ -35,7 +37,7 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr size_t kRandomIterations = 1000;  // Devices might be puny, don't stress them...
 #else
 static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerful.
@@ -1635,4 +1637,11 @@
   DriverStr(expected, "Repecmpsq");
 }
 
+TEST_F(AssemblerX86_64Test, Cmpb) {
+  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+                       x86_64::Immediate(0));
+  const char* expected = "cmpb $0, 128(%RDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ec6f96f..be38336 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -68,6 +68,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
 #include "runtime.h"
@@ -1030,6 +1031,9 @@
         compiler_options_->GetNativeDebuggable() ? OatHeader::kTrueValue : OatHeader::kFalseValue);
     key_value_store_->Put(OatHeader::kCompilerFilter,
         CompilerFilter::NameOfFilter(compiler_options_->GetCompilerFilter()));
+    key_value_store_->Put(OatHeader::kHasPatchInfoKey,
+        compiler_options_->GetIncludePatchInformation() ? OatHeader::kTrueValue
+                                                        : OatHeader::kFalseValue);
   }
 
   // Parse the arguments from the command line. In case of an unrecognized option or impossible
@@ -1322,7 +1326,7 @@
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum();
         image_file_location_oat_data_begin_ =
             reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
         image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
@@ -1522,6 +1526,7 @@
                                      instruction_set_,
                                      instruction_set_features_.get(),
                                      IsBootImage(),
+                                     IsAppImage(),
                                      image_classes_.release(),
                                      compiled_classes_.release(),
                                      /* compiled_methods */ nullptr,
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index b99e5c2..b080315 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -31,16 +31,23 @@
   // Should the disassembler print absolute or relative addresses.
   const bool absolute_addresses_;
 
-  // Base addess for calculating relative code offsets when absolute_addresses_ is false.
+  // Base address for calculating relative code offsets when absolute_addresses_ is false.
   const uint8_t* const base_address_;
 
+  // End address (exclusive);
+  const uint8_t* const end_address_;
+
   // If set, the disassembler is allowed to look at load targets in literal
   // pools.
   const bool can_read_literals_;
 
-  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address,
+  DisassemblerOptions(bool absolute_addresses,
+                      const uint8_t* base_address,
+                      const uint8_t* end_address,
                       bool can_read_literals)
-      : absolute_addresses_(absolute_addresses), base_address_(base_address),
+      : absolute_addresses_(absolute_addresses),
+        base_address_(base_address),
+        end_address_(end_address),
         can_read_literals_(can_read_literals) {}
 
  private:
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 0e709eb..286faf2 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -418,7 +418,12 @@
   return os << static_cast<int>(type);
 }
 
-void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32,
+void DumpThumb2Literal(std::ostream& args,
+                       const uint8_t* instr_ptr,
+                       const uintptr_t lo_adr,
+                       const uintptr_t hi_adr,
+                       uint32_t U,
+                       uint32_t imm32,
                        T2LitType type) {
   // Literal offsets (imm32) are not required to be aligned so we may need unaligned access.
   typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1)));
@@ -428,8 +433,16 @@
   typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1)));
   typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1)));
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4);
   uintptr_t lit_adr = U ? pc + imm32 : pc - imm32;
+  if (lit_adr < lo_adr || lit_adr >= hi_adr) {
+    args << "  ; (?)";
+    return;
+  }
+
   args << "  ; ";
   switch (type) {
     case kT2LitUByte:
@@ -482,6 +495,10 @@
     return DumpThumb16(os, instr_ptr);
   }
 
+  // Set valid address range of backing buffer.
+  const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+  const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
+
   uint32_t op2 = (instr >> 20) & 0x7F;
   std::ostringstream opcode;
   std::ostringstream args;
@@ -776,7 +793,7 @@
               if (imm5 == 0) {
                 args << "rrx";
               } else {
-                args << "ror";
+                args << "ror #" << imm5;
               }
               break;
           }
@@ -824,7 +841,7 @@
                 args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
                      << (imm8 << 2) << "]";
                 if (Rn.r == 15 && U == 1) {
-                  DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong);
+                  DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm8 << 2, kT2LitHexLong);
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
@@ -1262,10 +1279,10 @@
               imm32 = (S << 20) | (J2 << 19) | (J1 << 18) | (imm6 << 12) | (imm11 << 1);
               imm32 = (imm32 << 11) >> 11;  // sign extend 21 bit immediate.
             } else {
-              uint32_t I1 = ~(J1 ^ S);
-              uint32_t I2 = ~(J2 ^ S);
+              uint32_t I1 = (J1 ^ S) ^ 1;
+              uint32_t I2 = (J2 ^ S) ^ 1;
               imm32 = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
-              imm32 = (imm32 << 8) >> 8;  // sign extend 24 bit immediate.
+              imm32 = (imm32 << 7) >> 7;  // sign extend 25 bit immediate.
             }
             opcode << ".w";
             DumpBranchTarget(args, instr_ptr + 4, imm32);
@@ -1410,7 +1427,7 @@
               };
               DCHECK_LT(op2 >> 1, arraysize(lit_type));
               DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid);
-              DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]);
+              DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm12, lit_type[op2 >> 1]);
             }
           } else if ((instr & 0xFC0) == 0) {
             opcode << ldr_str << sign << type << ".w";
@@ -1711,10 +1728,13 @@
           break;
       }
     } else if (opcode1 == 0x12 || opcode1 == 0x13) {  // 01001x
+      const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+      const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
       ThumbRegister Rt(instr, 8);
       uint16_t imm8 = instr & 0xFF;
       opcode << "ldr";
       args << Rt << ", [pc, #" << (imm8 << 2) << "]";
+      DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, /*U*/ 1u, imm8 << 2, kT2LitHexWord);
     } else if ((opcode1 >= 0x14 && opcode1 <= 0x17) ||  // 0101xx
                (opcode1 >= 0x18 && opcode1 <= 0x1f) ||  // 011xxx
                (opcode1 >= 0x20 && opcode1 <= 0x27)) {  // 100xxx
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 5f88714..6a9afe5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -63,9 +63,17 @@
     return;
   }
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   void* data_address = instr->LiteralAddress<void*>();
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  if (data_address < base_address_ || data_address >= end_address_) {
+    AppendToOutput(" (?)");
+    return;
+  }
 
+  // Output information on literal.
+  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
   switch (op) {
     case vixl::LDR_w_lit:
     case vixl::LDR_x_lit:
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index 44fa53f..a4e5ee8 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -30,8 +30,11 @@
 
 class CustomDisassembler FINAL : public vixl::Disassembler {
  public:
-  explicit CustomDisassembler(DisassemblerOptions* options) :
-      vixl::Disassembler(), read_literals_(options->can_read_literals_) {
+  explicit CustomDisassembler(DisassemblerOptions* options)
+      : vixl::Disassembler(),
+        read_literals_(options->can_read_literals_),
+        base_address_(options->base_address_),
+        end_address_(options->end_address_) {
     if (!options->absolute_addresses_) {
       MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
     }
@@ -55,6 +58,10 @@
   //           true | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0)
   //          false | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0) (3.40282e+38)
   const bool read_literals_;
+
+  // Valid address range: [base_address_, end_address_)
+  const void* const base_address_;
+  const void* const end_address_;
 };
 
 class DisassemblerArm64 FINAL : public Disassembler {
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 93a0974..214222d 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -18,11 +18,13 @@
 #include <stdlib.h>
 
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <set>
 #include <map>
+#include <unordered_set>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -34,7 +36,6 @@
 #include "image.h"
 #include "scoped_thread_state_change.h"
 #include "os.h"
-#include "gc_map.h"
 
 #include "cmdline.h"
 #include "backtrace/BacktraceMap.h"
@@ -48,16 +49,20 @@
 class ImgDiagDumper {
  public:
   explicit ImgDiagDumper(std::ostream* os,
-                       const ImageHeader& image_header,
-                       const std::string& image_location,
-                       pid_t image_diff_pid)
+                         const ImageHeader& image_header,
+                         const std::string& image_location,
+                         pid_t image_diff_pid,
+                         pid_t zygote_diff_pid)
       : os_(os),
         image_header_(image_header),
         image_location_(image_location),
-        image_diff_pid_(image_diff_pid) {}
+        image_diff_pid_(image_diff_pid),
+        zygote_diff_pid_(zygote_diff_pid) {}
 
   bool Dump() SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
+    os << "IMAGE LOCATION: " << image_location_ << "\n\n";
+
     os << "MAGIC: " << image_header_.GetMagic() << "\n\n";
 
     os << "IMAGE BEGIN: " << reinterpret_cast<void*>(image_header_.GetImageBegin()) << "\n\n";
@@ -65,7 +70,7 @@
     bool ret = true;
     if (image_diff_pid_ >= 0) {
       os << "IMAGE DIFF PID (" << image_diff_pid_ << "): ";
-      ret = DumpImageDiff(image_diff_pid_);
+      ret = DumpImageDiff(image_diff_pid_, zygote_diff_pid_);
       os << "\n\n";
     } else {
       os << "IMAGE DIFF PID: disabled\n\n";
@@ -92,7 +97,8 @@
     return str.substr(idx + 1);
   }
 
-  bool DumpImageDiff(pid_t image_diff_pid) SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool DumpImageDiff(pid_t image_diff_pid, pid_t zygote_diff_pid)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
 
     {
@@ -135,11 +141,148 @@
     }
 
     // Future idea: diff against zygote so we can ignore the shared dirty pages.
-    return DumpImageDiffMap(image_diff_pid, boot_map);
+    return DumpImageDiffMap(image_diff_pid, zygote_diff_pid, boot_map);
   }
 
-    // Look at /proc/$pid/mem and only diff the things from there
-  bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map)
+  static std::string PrettyFieldValue(ArtField* field, mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    std::ostringstream oss;
+    switch (field->GetTypeAsPrimitiveType()) {
+      case Primitive::kPrimNot: {
+        oss << obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+            field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimBoolean: {
+        oss << static_cast<bool>(obj->GetFieldBoolean<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimByte: {
+        oss << static_cast<int32_t>(obj->GetFieldByte<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimChar: {
+        oss << obj->GetFieldChar<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimShort: {
+        oss << obj->GetFieldShort<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimInt: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimLong: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimFloat: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimDouble: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimVoid: {
+        oss << "void";
+        break;
+      }
+    }
+    return oss.str();
+  }
+
+  // Aggregate and detail class data from an image diff.
+  struct ClassData {
+    int dirty_object_count = 0;
+
+    // Track only the byte-per-byte dirtiness (in bytes)
+    int dirty_object_byte_count = 0;
+
+    // Track the object-by-object dirtiness (in bytes)
+    int dirty_object_size_in_bytes = 0;
+
+    int clean_object_count = 0;
+
+    std::string descriptor;
+
+    int false_dirty_byte_count = 0;
+    int false_dirty_object_count = 0;
+    std::vector<mirror::Object*> false_dirty_objects;
+
+    // Remote pointers to dirty objects
+    std::vector<mirror::Object*> dirty_objects;
+  };
+
+  void DiffObjectContents(mirror::Object* obj,
+                          uint8_t* remote_bytes,
+                          std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_) {
+    const char* tabs = "    ";
+    // Attempt to find fields for all dirty bytes.
+    mirror::Class* klass = obj->GetClass();
+    if (obj->IsClass()) {
+      os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
+    } else {
+      os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+    }
+
+    std::unordered_set<ArtField*> dirty_instance_fields;
+    std::unordered_set<ArtField*> dirty_static_fields;
+    const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+    mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(remote_bytes);
+    for (size_t i = 0, count = obj->SizeOf(); i < count; ++i) {
+      if (obj_bytes[i] != remote_bytes[i]) {
+        ArtField* field = ArtField::FindInstanceFieldWithOffset</*exact*/false>(klass, i);
+        if (field != nullptr) {
+          dirty_instance_fields.insert(field);
+        } else if (obj->IsClass()) {
+          field = ArtField::FindStaticFieldWithOffset</*exact*/false>(obj->AsClass(), i);
+          if (field != nullptr) {
+            dirty_static_fields.insert(field);
+          }
+        }
+        if (field == nullptr) {
+          if (klass->IsArrayClass()) {
+            mirror::Class* component_type = klass->GetComponentType();
+            Primitive::Type primitive_type = component_type->GetPrimitiveType();
+            size_t component_size = Primitive::ComponentSize(primitive_type);
+            size_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+            if (i >= data_offset) {
+              os << tabs << "Dirty array element " << (i - data_offset) / component_size << "\n";
+              // Skip to next element to prevent spam.
+              i += component_size - 1;
+              continue;
+            }
+          }
+          os << tabs << "No field for byte offset " << i << "\n";
+        }
+      }
+    }
+    // Dump different fields. TODO: Dump field contents.
+    if (!dirty_instance_fields.empty()) {
+      os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
+      for (ArtField* field : dirty_instance_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    if (!dirty_static_fields.empty()) {
+      os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
+      for (ArtField* field : dirty_static_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    os << "\n";
+  }
+
+  // Look at /proc/$pid/mem and only diff the things from there
+  bool DumpImageDiffMap(pid_t image_diff_pid,
+                        pid_t zygote_diff_pid,
+                        const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
     const size_t pointer_size = InstructionSetPointerSize(
@@ -198,6 +341,20 @@
       return false;
     }
 
+    std::vector<uint8_t> zygote_contents;
+    std::unique_ptr<File> zygote_map_file;
+    if (zygote_diff_pid != -1) {
+      std::string zygote_file_name =
+          StringPrintf("/proc/%ld/mem", static_cast<long>(zygote_diff_pid));  // NOLINT [runtime/int]
+      zygote_map_file.reset(OS::OpenFileForReading(zygote_file_name.c_str()));
+      // The boot map should be at the same address.
+      zygote_contents.resize(boot_map_size);
+      if (!zygote_map_file->PreadFully(&zygote_contents[0], boot_map_size, boot_map.start)) {
+        LOG(WARNING) << "Could not fully read zygote file " << zygote_file_name;
+        zygote_contents.clear();
+      }
+    }
+
     std::string page_map_file_name = StringPrintf(
         "/proc/%ld/pagemap", static_cast<long>(image_diff_pid));  // NOLINT [runtime/int]
     auto page_map_file = std::unique_ptr<File>(OS::OpenFileForReading(page_map_file_name.c_str()));
@@ -322,16 +479,10 @@
       }
     }
 
+    std::map<mirror::Class*, ClassData> class_data;
+
     // Walk each object in the remote image space and compare it against ours
     size_t different_objects = 0;
-    std::map<mirror::Class*, int /*count*/> dirty_object_class_map;
-    // Track only the byte-per-byte dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_byte_count;
-    // Track the object-by-object dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_size_in_bytes;
-    std::map<mirror::Class*, int /*count*/> clean_object_class_map;
-
-    std::map<mirror::Class*, std::string> class_to_descriptor_map;
 
     std::map<off_t /* field offset */, int /* count */> art_method_field_dirty_count;
     std::vector<ArtMethod*> art_method_dirty_objects;
@@ -341,131 +492,139 @@
 
     // List of local objects that are clean, but located on dirty pages.
     std::vector<mirror::Object*> false_dirty_objects;
-    std::map<mirror::Class*, int /*byte_count*/> false_dirty_byte_count;
-    std::map<mirror::Class*, int /*object_count*/> false_dirty_object_count;
-    std::map<mirror::Class*, std::vector<mirror::Object*>> false_dirty_objects_map;
     size_t false_dirty_object_bytes = 0;
 
-    // Remote pointers to dirty objects
-    std::map<mirror::Class*, std::vector<mirror::Object*>> dirty_objects_by_class;
     // Look up remote classes by their descriptor
     std::map<std::string, mirror::Class*> remote_class_map;
     // Look up local classes by their descriptor
     std::map<std::string, mirror::Class*> local_class_map;
 
+    // Objects that are dirty against the image (possibly shared or private dirty).
+    std::set<mirror::Object*> image_dirty_objects;
+
+    // Objects that are dirty against the zygote (probably private dirty).
+    std::set<mirror::Object*> zygote_dirty_objects;
+
     size_t dirty_object_bytes = 0;
-    {
-      const uint8_t* begin_image_ptr = image_begin_unaligned;
-      const uint8_t* end_image_ptr = image_mirror_end_unaligned;
+    const uint8_t* begin_image_ptr = image_begin_unaligned;
+    const uint8_t* end_image_ptr = image_mirror_end_unaligned;
 
-      const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
-      while (reinterpret_cast<const uintptr_t>(current)
-             < reinterpret_cast<const uintptr_t>(end_image_ptr)) {
-        CHECK_ALIGNED(current, kObjectAlignment);
-        mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
+    const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    while (reinterpret_cast<uintptr_t>(current) < reinterpret_cast<uintptr_t>(end_image_ptr)) {
+      CHECK_ALIGNED(current, kObjectAlignment);
+      mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
 
-        // Sanity check that we are reading a real object
-        CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-        if (kUseBakerOrBrooksReadBarrier) {
-          obj->AssertReadBarrierPointer();
+      // Sanity check that we are reading a real object
+      CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
+      if (kUseBakerOrBrooksReadBarrier) {
+        obj->AssertReadBarrierPointer();
+      }
+
+      // Iterate every page this object belongs to
+      bool on_dirty_page = false;
+      size_t page_off = 0;
+      size_t current_page_idx;
+      uintptr_t object_address;
+      do {
+        object_address = reinterpret_cast<uintptr_t>(current);
+        current_page_idx = object_address / kPageSize + page_off;
+
+        if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
+          // This object is on a dirty page
+          on_dirty_page = true;
         }
 
-        // Iterate every page this object belongs to
-        bool on_dirty_page = false;
-        size_t page_off = 0;
-        size_t current_page_idx;
-        uintptr_t object_address;
-        do {
-          object_address = reinterpret_cast<uintptr_t>(current);
-          current_page_idx = object_address / kPageSize + page_off;
+        page_off++;
+      } while ((current_page_idx * kPageSize) <
+               RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
 
-          if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
-            // This object is on a dirty page
-            on_dirty_page = true;
+      mirror::Class* klass = obj->GetClass();
+
+      // Check against the other object and see if they are different
+      ptrdiff_t offset = current - begin_image_ptr;
+      const uint8_t* current_remote = &remote_contents[offset];
+      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
+          const_cast<uint8_t*>(current_remote));
+
+      bool different_image_object = memcmp(current, current_remote, obj->SizeOf()) != 0;
+      if (different_image_object) {
+        bool different_zygote_object = false;
+        if (!zygote_contents.empty()) {
+          const uint8_t* zygote_ptr = &zygote_contents[offset];
+          different_zygote_object = memcmp(current, zygote_ptr, obj->SizeOf()) != 0;
+        }
+        if (different_zygote_object) {
+          // Different from zygote.
+          zygote_dirty_objects.insert(obj);
+        } else {
+          // Just different from iamge.
+          image_dirty_objects.insert(obj);
+        }
+
+        different_objects++;
+        dirty_object_bytes += obj->SizeOf();
+
+        ++class_data[klass].dirty_object_count;
+
+        // Go byte-by-byte and figure out what exactly got dirtied
+        size_t dirty_byte_count_per_object = 0;
+        for (size_t i = 0; i < obj->SizeOf(); ++i) {
+          if (current[i] != current_remote[i]) {
+            dirty_byte_count_per_object++;
           }
+        }
+        class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object;
+        class_data[klass].dirty_object_size_in_bytes += obj->SizeOf();
+        class_data[klass].dirty_objects.push_back(remote_obj);
+      } else {
+        ++class_data[klass].clean_object_count;
+      }
 
-          page_off++;
-        } while ((current_page_idx * kPageSize) <
-                 RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
+      std::string descriptor = GetClassDescriptor(klass);
+      if (different_image_object) {
+        if (klass->IsClassClass()) {
+          // this is a "Class"
+          mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
 
-        mirror::Class* klass = obj->GetClass();
-
-        bool different_object = false;
-
-        // Check against the other object and see if they are different
-        ptrdiff_t offset = current - begin_image_ptr;
-        const uint8_t* current_remote = &remote_contents[offset];
-        mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
-            const_cast<uint8_t*>(current_remote));
-        if (memcmp(current, current_remote, obj->SizeOf()) != 0) {
-          different_objects++;
-          dirty_object_bytes += obj->SizeOf();
-
-          ++dirty_object_class_map[klass];
-
-          // Go byte-by-byte and figure out what exactly got dirtied
-          size_t dirty_byte_count_per_object = 0;
+          // print the fields that are dirty
           for (size_t i = 0; i < obj->SizeOf(); ++i) {
             if (current[i] != current_remote[i]) {
-              dirty_byte_count_per_object++;
+              class_field_dirty_count[i]++;
             }
           }
-          dirty_object_byte_count[klass] += dirty_byte_count_per_object;
-          dirty_object_size_in_bytes[klass] += obj->SizeOf();
 
-          different_object = true;
+          class_dirty_objects.push_back(obj_as_class);
+        } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
+          // this is an ArtMethod
+          ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
 
-          dirty_objects_by_class[klass].push_back(remote_obj);
-        } else {
-          ++clean_object_class_map[klass];
-        }
-
-        std::string descriptor = GetClassDescriptor(klass);
-        if (different_object) {
-          if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-            // this is a "Class"
-            mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                class_field_dirty_count[i]++;
-              }
+          // print the fields that are dirty
+          for (size_t i = 0; i < obj->SizeOf(); ++i) {
+            if (current[i] != current_remote[i]) {
+              art_method_field_dirty_count[i]++;
             }
-
-            class_dirty_objects.push_back(obj_as_class);
-          } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-            // this is an ArtMethod
-            ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                art_method_field_dirty_count[i]++;
-              }
-            }
-
-            art_method_dirty_objects.push_back(art_method);
           }
-        } else if (on_dirty_page) {
-          // This object was either never mutated or got mutated back to the same value.
-          // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
-          false_dirty_objects.push_back(obj);
-          false_dirty_objects_map[klass].push_back(obj);
-          false_dirty_object_bytes += obj->SizeOf();
-          false_dirty_byte_count[obj->GetClass()] += obj->SizeOf();
-          false_dirty_object_count[obj->GetClass()] += 1;
-        }
 
-        if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-          local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
-          remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+          art_method_dirty_objects.push_back(art_method);
         }
-
-        // Unconditionally store the class descriptor in case we need it later
-        class_to_descriptor_map[klass] = descriptor;
-        current += RoundUp(obj->SizeOf(), kObjectAlignment);
+      } else if (on_dirty_page) {
+        // This object was either never mutated or got mutated back to the same value.
+        // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
+        false_dirty_objects.push_back(obj);
+        class_data[klass].false_dirty_objects.push_back(obj);
+        false_dirty_object_bytes += obj->SizeOf();
+        class_data[obj->GetClass()].false_dirty_byte_count += obj->SizeOf();
+        class_data[obj->GetClass()].false_dirty_object_count += 1;
       }
+
+      if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
+        local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
+        remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+      }
+
+      // Unconditionally store the class descriptor in case we need it later
+      class_data[klass].descriptor = descriptor;
+      current += RoundUp(obj->SizeOf(), kObjectAlignment);
     }
 
     // Looking at only dirty pages, figure out how many of those bytes belong to dirty objects.
@@ -489,17 +648,39 @@
        << "";
 
     // vector of pairs (int count, Class*)
-    auto dirty_object_class_values = SortByValueDesc(dirty_object_class_map);
-    auto clean_object_class_values = SortByValueDesc(clean_object_class_map);
+    auto dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.dirty_object_count; });
+    auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.clean_object_count; });
+
+    if (!zygote_dirty_objects.empty()) {
+      os << "\n" << "  Dirty objects compared to zygote (probably private dirty): "
+         << zygote_dirty_objects.size() << "\n";
+      for (mirror::Object* obj : zygote_dirty_objects) {
+        const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+        ptrdiff_t offset = obj_bytes - begin_image_ptr;
+        uint8_t* remote_bytes = &zygote_contents[offset];
+        DiffObjectContents(obj, remote_bytes, os);
+      }
+    }
+    os << "\n" << "  Dirty objects compared to image (private or shared dirty): "
+       << image_dirty_objects.size() << "\n";
+    for (mirror::Object* obj : image_dirty_objects) {
+      const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+      ptrdiff_t offset = obj_bytes - begin_image_ptr;
+      uint8_t* remote_bytes = &remote_contents[offset];
+      DiffObjectContents(obj, remote_bytes, os);
+    }
 
     os << "\n" << "  Dirty object count by class:\n";
     for (const auto& vk_pair : dirty_object_class_values) {
       int dirty_object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = dirty_object_size_in_bytes[klass];
-      float avg_dirty_bytes_per_class = dirty_object_byte_count[klass] * 1.0f / object_sizes;
+      int object_sizes = class_data[klass].dirty_object_size_in_bytes;
+      float avg_dirty_bytes_per_class =
+          class_data[klass].dirty_object_byte_count * 1.0f / object_sizes;
       float avg_object_size = object_sizes * 1.0f / dirty_object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << dirty_object_count << ", "
          << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", "
@@ -518,7 +699,8 @@
         os << "\n";
 
         os << "      dirty byte +offset:count list = ";
-        auto art_method_field_dirty_count_sorted = SortByValueDesc(art_method_field_dirty_count);
+        auto art_method_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(art_method_field_dirty_count);
         for (auto pair : art_method_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -529,7 +711,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote method
           auto art_method = reinterpret_cast<ArtMethod*>(obj);
@@ -568,7 +750,8 @@
         os << "\n";
 
         os << "       dirty byte +offset:count list = ";
-        auto class_field_dirty_count_sorted = SortByValueDesc(class_field_dirty_count);
+        auto class_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(class_field_dirty_count);
         for (auto pair : class_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -578,7 +761,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote class object
           auto remote_klass = reinterpret_cast<mirror::Class*>(obj);
@@ -596,15 +779,16 @@
       }
     }
 
-    auto false_dirty_object_class_values = SortByValueDesc(false_dirty_object_count);
+    auto false_dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.false_dirty_object_count; });
 
     os << "\n" << "  False-dirty object count by class:\n";
     for (const auto& vk_pair : false_dirty_object_class_values) {
       int object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = false_dirty_byte_count[klass];
+      int object_sizes = class_data[klass].false_dirty_byte_count;
       float avg_object_size = object_sizes * 1.0f / object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << object_count << ", "
          << "avg object size: " << avg_object_size << ", "
@@ -613,7 +797,7 @@
          << ")\n";
 
       if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-        auto& art_method_false_dirty_objects = false_dirty_objects_map[klass];
+        auto& art_method_false_dirty_objects = class_data[klass].false_dirty_objects;
 
         os << "      field contents:\n";
         for (mirror::Object* obj : art_method_false_dirty_objects) {
@@ -692,14 +876,16 @@
     return std::string(descriptor_str);
   }
 
-  template <typename K, typename V>
-  static std::vector<std::pair<V, K>> SortByValueDesc(const std::map<K, V> map) {
+  template <typename K, typename V, typename D>
+  static std::vector<std::pair<V, K>> SortByValueDesc(
+      const std::map<K, D> map,
+      std::function<V(const D&)> value_mapper = [](const D& d) { return static_cast<V>(d); }) {
     // Store value->key so that we can use the default sort from pair which
     // sorts by value first and then key
     std::vector<std::pair<V, K>> value_key_vector;
 
     for (const auto& kv_pair : map) {
-      value_key_vector.push_back(std::make_pair(kv_pair.second, kv_pair.first));
+      value_key_vector.push_back(std::make_pair(value_mapper(kv_pair.second), kv_pair.first));
     }
 
     // Sort in reverse (descending order)
@@ -822,11 +1008,15 @@
   const ImageHeader& image_header_;
   const std::string image_location_;
   pid_t image_diff_pid_;  // Dump image diff against boot.art if pid is non-negative
+  pid_t zygote_diff_pid_;  // Dump image diff against zygote boot.art if pid is non-negative
 
   DISALLOW_COPY_AND_ASSIGN(ImgDiagDumper);
 };
 
-static int DumpImage(Runtime* runtime, std::ostream* os, pid_t image_diff_pid) {
+static int DumpImage(Runtime* runtime,
+                     std::ostream* os,
+                     pid_t image_diff_pid,
+                     pid_t zygote_diff_pid) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
   std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
@@ -838,8 +1028,11 @@
       return EXIT_FAILURE;
     }
 
-    ImgDiagDumper img_diag_dumper(
-        os, image_header, image_space->GetImageLocation(), image_diff_pid);
+    ImgDiagDumper img_diag_dumper(os,
+                                  image_header,
+                                  image_space->GetImageLocation(),
+                                  image_diff_pid,
+                                  zygote_diff_pid);
     if (!img_diag_dumper.Dump()) {
       return EXIT_FAILURE;
     }
@@ -867,6 +1060,13 @@
         *error_msg = "Image diff pid out of range";
         return kParseError;
       }
+    } else if (option.starts_with("--zygote-diff-pid=")) {
+      const char* zygote_diff_pid = option.substr(strlen("--zygote-diff-pid=")).data();
+
+      if (!ParseInt(zygote_diff_pid, &zygote_diff_pid_)) {
+        *error_msg = "Zygote diff pid out of range";
+        return kParseError;
+      }
     } else {
       return kParseUnknownArgument;
     }
@@ -916,6 +1116,9 @@
     usage +=  // Optional.
         "  --image-diff-pid=<pid>: provide the PID of a process whose boot.art you want to diff.\n"
         "      Example: --image-diff-pid=$(pid zygote)\n"
+        "  --zygote-diff-pid=<pid>: provide the PID of the zygote whose boot.art you want to diff "
+        "against.\n"
+        "      Example: --zygote-diff-pid=$(pid zygote)\n"
         "\n";
 
     return usage;
@@ -923,6 +1126,7 @@
 
  public:
   pid_t image_diff_pid_ = -1;
+  pid_t zygote_diff_pid_ = -1;
 };
 
 struct ImgDiagMain : public CmdlineMain<ImgDiagArgs> {
@@ -931,7 +1135,8 @@
 
     return DumpImage(runtime,
                      args_->os_,
-                     args_->image_diff_pid_) == EXIT_SUCCESS;
+                     args_->image_diff_pid_,
+                     args_->zygote_diff_pid_) == EXIT_SUCCESS;
   }
 };
 
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index dc101e5..9f771ba 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,8 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+static const char* kImgDiagZygoteDiffPid = "--zygote-diff-pid";
+
 // from kernel <include/linux/threads.h>
 #define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
 
@@ -90,17 +92,25 @@
 
     // Run imgdiag --image-diff-pid=$image_diff_pid and wait until it's done with a 0 exit code.
     std::string diff_pid_args;
+    std::string zygote_diff_pid_args;
     {
       std::stringstream diff_pid_args_ss;
       diff_pid_args_ss << kImgDiagDiffPid << "=" << image_diff_pid;
       diff_pid_args = diff_pid_args_ss.str();
     }
-    std::string boot_image_args;
     {
-      boot_image_args = boot_image_args + kImgDiagBootImage + "=" + boot_image;
+      std::stringstream zygote_pid_args_ss;
+      zygote_pid_args_ss << kImgDiagZygoteDiffPid << "=" << image_diff_pid;
+      zygote_diff_pid_args = zygote_pid_args_ss.str();
     }
+    std::string boot_image_args = std::string(kImgDiagBootImage) + "=" + boot_image;
 
-    std::vector<std::string> exec_argv = { file_path, diff_pid_args, boot_image_args };
+    std::vector<std::string> exec_argv = {
+        file_path,
+        diff_pid_args,
+        zygote_diff_pid_args,
+        boot_image_args
+    };
 
     return ::art::Exec(exec_argv, error_msg);
   }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 48a9d91..f5458c0 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -38,7 +38,6 @@
 #include "dex_instruction.h"
 #include "disassembler.h"
 #include "elf_builder.h"
-#include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -46,7 +45,6 @@
 #include "indenter.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
-#include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -61,9 +59,8 @@
 #include "stack_map.h"
 #include "ScopedLocalRef.h"
 #include "thread_list.h"
-#include "verifier/dex_gc_map.h"
+#include "type_lookup_table.h"
 #include "verifier/method_verifier.h"
-#include "vmap_table.h"
 #include "well_known_classes.h"
 
 #include <sys/stat.h>
@@ -248,6 +245,10 @@
       return;
     }
 
+    uint32_t entry_point = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    // Clear Thumb2 bit.
+    const void* code_address = EntryPointToCodePointer(reinterpret_cast<void*>(entry_point));
+
     debug::MethodDebugInfo info = debug::MethodDebugInfo();
     info.trampoline_name = nullptr;
     info.dex_file = &dex_file;
@@ -260,7 +261,7 @@
     info.is_native_debuggable = oat_header.IsNativeDebuggable();
     info.is_optimized = method_header->IsOptimized();
     info.is_code_address_text_relative = true;
-    info.code_address = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    info.code_address = reinterpret_cast<uintptr_t>(code_address);
     info.code_size = method_header->GetCodeSize();
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = info.is_optimized ? method_header->GetOptimizedCodeInfoPtr() : nullptr;
@@ -279,9 +280,7 @@
 
 class OatDumperOptions {
  public:
-  OatDumperOptions(bool dump_raw_mapping_table,
-                   bool dump_raw_gc_map,
-                   bool dump_vmap,
+  OatDumperOptions(bool dump_vmap,
                    bool dump_code_info_stack_maps,
                    bool disassemble_code,
                    bool absolute_addresses,
@@ -294,9 +293,7 @@
                    const char* app_image,
                    const char* app_oat,
                    uint32_t addr2instr)
-    : dump_raw_mapping_table_(dump_raw_mapping_table),
-      dump_raw_gc_map_(dump_raw_gc_map),
-      dump_vmap_(dump_vmap),
+    : dump_vmap_(dump_vmap),
       dump_code_info_stack_maps_(dump_code_info_stack_maps),
       disassemble_code_(disassemble_code),
       absolute_addresses_(absolute_addresses),
@@ -311,8 +308,6 @@
       addr2instr_(addr2instr),
       class_loader_(nullptr) {}
 
-  const bool dump_raw_mapping_table_;
-  const bool dump_raw_gc_map_;
   const bool dump_vmap_;
   const bool dump_code_info_stack_maps_;
   const bool disassemble_code_;
@@ -340,6 +335,7 @@
       disassembler_(Disassembler::Create(instruction_set_,
                                          new DisassemblerOptions(options_.absolute_addresses_,
                                                                  oat_file.Begin(),
+                                                                 oat_file.End(),
                                                                  true /* can_read_literals_ */))) {
     CHECK(options_.class_loader_ != nullptr);
     CHECK(options_.class_filter_ != nullptr);
@@ -569,9 +565,7 @@
       code_offset &= ~0x1;
     }
     offsets_.insert(code_offset);
-    offsets_.insert(oat_method.GetMappingTableOffset());
     offsets_.insert(oat_method.GetVmapTableOffset());
-    offsets_.insert(oat_method.GetGcMapOffset());
   }
 
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
@@ -581,8 +575,15 @@
     os << StringPrintf("location: %s\n", oat_dex_file.GetDexFileLocation().c_str());
     os << StringPrintf("checksum: 0x%08x\n", oat_dex_file.GetDexFileLocationChecksum());
 
-    // Create the verifier early.
+    // Print embedded dex file data range.
+    const uint8_t* const oat_file_begin = oat_dex_file.GetOatFile()->Begin();
+    const uint8_t* const dex_file_pointer = oat_dex_file.GetDexFilePointer();
+    uint32_t dex_offset = dchecked_integral_cast<uint32_t>(dex_file_pointer - oat_file_begin);
+    os << StringPrintf("dex-file: 0x%08x..0x%08x\n",
+                       dex_offset,
+                       dchecked_integral_cast<uint32_t>(dex_offset + oat_dex_file.FileSize() - 1));
 
+    // Create the dex file early. A lot of print-out things depend on it.
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(&oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
@@ -591,6 +592,16 @@
       return false;
     }
 
+    // Print lookup table, if it exists.
+    if (oat_dex_file.GetLookupTableData() != nullptr) {
+      uint32_t table_offset = dchecked_integral_cast<uint32_t>(
+          oat_dex_file.GetLookupTableData() - oat_file_begin);
+      uint32_t table_size = TypeLookupTable::RawDataLength(*dex_file);
+      os << StringPrintf("type-table: 0x%08x..0x%08x\n",
+                         table_offset,
+                         table_offset + table_size - 1);
+    }
+
     VariableIndentationOutputStream vios(&os);
     ScopedIndentation indent1(&vios);
     for (size_t class_def_index = 0;
@@ -840,22 +851,6 @@
         success = false;
       }
       vios->Stream() << "\n";
-
-      vios->Stream() << "gc_map: ";
-      if (options_.absolute_addresses_) {
-        vios->Stream() << StringPrintf("%p ", oat_method.GetGcMap());
-      }
-      uint32_t gc_map_offset = oat_method.GetGcMapOffset();
-      vios->Stream() << StringPrintf("(offset=0x%08x)\n", gc_map_offset);
-      if (gc_map_offset > oat_file_.Size()) {
-        vios->Stream() << StringPrintf("WARNING: "
-                           "gc map table offset 0x%08x is past end of file 0x%08zx.\n",
-                           gc_map_offset, oat_file_.Size());
-        success = false;
-      } else if (options_.dump_raw_gc_map_) {
-        ScopedIndentation indent3(vios);
-        DumpGcMap(vios->Stream(), oat_method, code_item);
-      }
     }
     {
       vios->Stream() << "OatQuickMethodHeader ";
@@ -876,24 +871,6 @@
       }
 
       ScopedIndentation indent2(vios);
-      vios->Stream() << "mapping_table: ";
-      if (options_.absolute_addresses_) {
-        vios->Stream() << StringPrintf("%p ", oat_method.GetMappingTable());
-      }
-      uint32_t mapping_table_offset = oat_method.GetMappingTableOffset();
-      vios->Stream() << StringPrintf("(offset=0x%08x)\n", oat_method.GetMappingTableOffset());
-      if (mapping_table_offset > oat_file_.Size()) {
-        vios->Stream() << StringPrintf("WARNING: "
-                                       "mapping table offset 0x%08x is past end of file 0x%08zx. "
-                                       "mapping table offset was loaded from offset 0x%08x.\n",
-                                       mapping_table_offset, oat_file_.Size(),
-                                       oat_method.GetMappingTableOffsetOffset());
-        success = false;
-      } else if (options_.dump_raw_mapping_table_) {
-        ScopedIndentation indent3(vios);
-        DumpMappingTable(vios, oat_method);
-      }
-
       vios->Stream() << "vmap_table: ";
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
@@ -970,7 +947,7 @@
           success = false;
           if (options_.disassemble_code_) {
             if (code_size_offset + kPrologueBytes <= oat_file_.Size()) {
-              DumpCode(vios, verifier.get(), oat_method, code_item, true, kPrologueBytes);
+              DumpCode(vios, oat_method, code_item, true, kPrologueBytes);
             }
           }
         } else if (code_size > kMaxCodeSize) {
@@ -983,11 +960,11 @@
           success = false;
           if (options_.disassemble_code_) {
             if (code_size_offset + kPrologueBytes <= oat_file_.Size()) {
-              DumpCode(vios, verifier.get(), oat_method, code_item, true, kPrologueBytes);
+              DumpCode(vios, oat_method, code_item, true, kPrologueBytes);
             }
           }
         } else if (options_.disassemble_code_) {
-          DumpCode(vios, verifier.get(), oat_method, code_item, !success, 0);
+          DumpCode(vios, oat_method, code_item, !success, 0);
         }
       }
     }
@@ -1037,12 +1014,7 @@
       ScopedIndentation indent(vios);
       vios->Stream() << "quickened data\n";
     } else {
-      // Otherwise, display the vmap table.
-      const uint8_t* raw_table = oat_method.GetVmapTable();
-      if (raw_table != nullptr) {
-        VmapTable vmap_table(raw_table);
-        DumpVmapTable(vios->Stream(), oat_method, vmap_table);
-      }
+      // Otherwise, there is nothing to display.
     }
   }
 
@@ -1057,32 +1029,6 @@
                    options_.dump_code_info_stack_maps_);
   }
 
-  // Display a vmap table.
-  void DumpVmapTable(std::ostream& os,
-                     const OatFile::OatMethod& oat_method,
-                     const VmapTable& vmap_table) {
-    bool first = true;
-    bool processing_fp = false;
-    uint32_t spill_mask = oat_method.GetCoreSpillMask();
-    for (size_t i = 0; i < vmap_table.Size(); i++) {
-      uint16_t dex_reg = vmap_table[i];
-      uint32_t cpu_reg = vmap_table.ComputeRegister(spill_mask, i,
-                                                    processing_fp ? kFloatVReg : kIntVReg);
-      os << (first ? "v" : ", v")  << dex_reg;
-      if (!processing_fp) {
-        os << "/r" << cpu_reg;
-      } else {
-        os << "/fr" << cpu_reg;
-      }
-      first = false;
-      if (!processing_fp && dex_reg == 0xFFFF) {
-        processing_fp = true;
-        spill_mask = oat_method.GetFpSpillMask();
-      }
-    }
-    os << "\n";
-  }
-
   void DumpVregLocations(std::ostream& os, const OatFile::OatMethod& oat_method,
                          const DexFile::CodeItem* code_item) {
     if (code_item != nullptr) {
@@ -1125,207 +1071,6 @@
     }
   }
 
-  void DescribeVReg(std::ostream& os, const OatFile::OatMethod& oat_method,
-                    const DexFile::CodeItem* code_item, size_t reg, VRegKind kind) {
-    const uint8_t* raw_table = oat_method.GetVmapTable();
-    if (raw_table != nullptr) {
-      const VmapTable vmap_table(raw_table);
-      uint32_t vmap_offset;
-      if (vmap_table.IsInContext(reg, kind, &vmap_offset)) {
-        bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-        uint32_t spill_mask = is_float ? oat_method.GetFpSpillMask()
-                                       : oat_method.GetCoreSpillMask();
-        os << (is_float ? "fr" : "r") << vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-      } else {
-        uint32_t offset = StackVisitor::GetVRegOffsetFromQuickCode(
-            code_item,
-            oat_method.GetCoreSpillMask(),
-            oat_method.GetFpSpillMask(),
-            oat_method.GetFrameSizeInBytes(),
-            reg,
-            GetInstructionSet());
-        os << "[sp + #" << offset << "]";
-      }
-    }
-  }
-
-  void DumpGcMapRegisters(std::ostream& os, const OatFile::OatMethod& oat_method,
-                          const DexFile::CodeItem* code_item,
-                          size_t num_regs, const uint8_t* reg_bitmap) {
-    bool first = true;
-    for (size_t reg = 0; reg < num_regs; reg++) {
-      if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-        if (first) {
-          os << "  v" << reg << " (";
-          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-          os << ")";
-          first = false;
-        } else {
-          os << ", v" << reg << " (";
-          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-          os << ")";
-        }
-      }
-    }
-    if (first) {
-      os << "No registers in GC map\n";
-    } else {
-      os << "\n";
-    }
-  }
-  void DumpGcMap(std::ostream& os, const OatFile::OatMethod& oat_method,
-                 const DexFile::CodeItem* code_item) {
-    const uint8_t* gc_map_raw = oat_method.GetGcMap();
-    if (gc_map_raw == nullptr) {
-      return;  // No GC map.
-    }
-    const void* quick_code = oat_method.GetQuickCode();
-    NativePcOffsetToReferenceMap map(gc_map_raw);
-    for (size_t entry = 0; entry < map.NumEntries(); entry++) {
-      const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(quick_code) +
-          map.GetNativePcOffset(entry);
-      os << StringPrintf("%p", native_pc);
-      DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
-    }
-  }
-
-  void DumpMappingTable(VariableIndentationOutputStream* vios,
-                        const OatFile::OatMethod& oat_method) {
-    const void* quick_code = oat_method.GetQuickCode();
-    if (quick_code == nullptr) {
-      return;
-    }
-    MappingTable table(oat_method.GetMappingTable());
-    if (table.TotalSize() != 0) {
-      if (table.PcToDexSize() != 0) {
-        typedef MappingTable::PcToDexIterator It;
-        vios->Stream() << "suspend point mappings {\n";
-        for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-          ScopedIndentation indent1(vios);
-          vios->Stream() << StringPrintf("0x%04x -> 0x%04x\n", cur.NativePcOffset(), cur.DexPc());
-        }
-        vios->Stream() << "}\n";
-      }
-      if (table.DexToPcSize() != 0) {
-        typedef MappingTable::DexToPcIterator It;
-        vios->Stream() << "catch entry mappings {\n";
-        for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-          ScopedIndentation indent1(vios);
-          vios->Stream() << StringPrintf("0x%04x -> 0x%04x\n", cur.NativePcOffset(), cur.DexPc());
-        }
-        vios->Stream() << "}\n";
-      }
-    }
-  }
-
-  uint32_t DumpInformationAtOffset(VariableIndentationOutputStream* vios,
-                                   const OatFile::OatMethod& oat_method,
-                                   const DexFile::CodeItem* code_item,
-                                   size_t offset,
-                                   bool suspend_point_mapping) {
-    if (IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
-      if (suspend_point_mapping) {
-        ScopedIndentation indent1(vios);
-        DumpDexRegisterMapAtOffset(vios, oat_method, code_item, offset);
-      }
-      // The return value is not used in the case of a method compiled
-      // with the optimizing compiler.
-      return DexFile::kDexNoIndex;
-    } else {
-      return DumpMappingAtOffset(vios->Stream(), oat_method, offset, suspend_point_mapping);
-    }
-  }
-
-  uint32_t DumpMappingAtOffset(std::ostream& os, const OatFile::OatMethod& oat_method,
-                               size_t offset, bool suspend_point_mapping) {
-    MappingTable table(oat_method.GetMappingTable());
-    if (suspend_point_mapping && table.PcToDexSize() > 0) {
-      typedef MappingTable::PcToDexIterator It;
-      for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-        if (offset == cur.NativePcOffset()) {
-          os << StringPrintf("suspend point dex PC: 0x%04x\n", cur.DexPc());
-          return cur.DexPc();
-        }
-      }
-    } else if (!suspend_point_mapping && table.DexToPcSize() > 0) {
-      typedef MappingTable::DexToPcIterator It;
-      for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-        if (offset == cur.NativePcOffset()) {
-          os << StringPrintf("catch entry dex PC: 0x%04x\n", cur.DexPc());
-          return cur.DexPc();
-        }
-      }
-    }
-    return DexFile::kDexNoIndex;
-  }
-
-  void DumpGcMapAtNativePcOffset(std::ostream& os, const OatFile::OatMethod& oat_method,
-                                 const DexFile::CodeItem* code_item, size_t native_pc_offset) {
-    const uint8_t* gc_map_raw = oat_method.GetGcMap();
-    if (gc_map_raw != nullptr) {
-      NativePcOffsetToReferenceMap map(gc_map_raw);
-      if (map.HasEntry(native_pc_offset)) {
-        size_t num_regs = map.RegWidth() * 8;
-        const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
-        bool first = true;
-        for (size_t reg = 0; reg < num_regs; reg++) {
-          if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-            if (first) {
-              os << "GC map objects:  v" << reg << " (";
-              DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-              os << ")";
-              first = false;
-            } else {
-              os << ", v" << reg << " (";
-              DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-              os << ")";
-            }
-          }
-        }
-        if (!first) {
-          os << "\n";
-        }
-      }
-    }
-  }
-
-  void DumpVRegsAtDexPc(std::ostream& os, verifier::MethodVerifier* verifier,
-                        const OatFile::OatMethod& oat_method,
-                        const DexFile::CodeItem* code_item, uint32_t dex_pc) {
-    DCHECK(verifier != nullptr);
-    std::vector<int32_t> kinds = verifier->DescribeVRegs(dex_pc);
-    bool first = true;
-    for (size_t reg = 0; reg < code_item->registers_size_; reg++) {
-      VRegKind kind = static_cast<VRegKind>(kinds.at(reg * 2));
-      if (kind != kUndefined) {
-        if (first) {
-          os << "VRegs:  v";
-          first = false;
-        } else {
-          os << ", v";
-        }
-        os << reg << " (";
-        switch (kind) {
-          case kImpreciseConstant:
-            os << "Imprecise Constant: " << kinds.at((reg * 2) + 1) << ", ";
-            DescribeVReg(os, oat_method, code_item, reg, kind);
-            break;
-          case kConstant:
-            os << "Constant: " << kinds.at((reg * 2) + 1);
-            break;
-          default:
-            DescribeVReg(os, oat_method, code_item, reg, kind);
-            break;
-        }
-        os << ")";
-      }
-    }
-    if (!first) {
-      os << "\n";
-    }
-  }
-
-
   void DumpDexCode(std::ostream& os, const DexFile& dex_file, const DexFile::CodeItem* code_item) {
     if (code_item != nullptr) {
       size_t i = 0;
@@ -1346,7 +1091,7 @@
     // null, then this method has been compiled with the optimizing
     // compiler.
     return oat_method.GetQuickCode() != nullptr &&
-           oat_method.GetGcMap() == nullptr &&
+           oat_method.GetVmapTable() != nullptr &&
            code_item != nullptr;
   }
 
@@ -1362,27 +1107,6 @@
            code_item != nullptr;
   }
 
-  void DumpDexRegisterMapAtOffset(VariableIndentationOutputStream* vios,
-                                  const OatFile::OatMethod& oat_method,
-                                  const DexFile::CodeItem* code_item,
-                                  size_t offset) {
-    // This method is only relevant for oat methods compiled with the
-    // optimizing compiler.
-    DCHECK(IsMethodGeneratedByOptimizingCompiler(oat_method, code_item));
-
-    // The optimizing compiler outputs its CodeInfo data in the vmap table.
-    const void* raw_code_info = oat_method.GetVmapTable();
-    if (raw_code_info != nullptr) {
-      CodeInfo code_info(raw_code_info);
-      StackMapEncoding encoding = code_info.ExtractEncoding();
-      StackMap stack_map = code_info.GetStackMapForNativePcOffset(offset, encoding);
-      if (stack_map.IsValid()) {
-        stack_map.Dump(vios, code_info, encoding, oat_method.GetCodeOffset(),
-                       code_item->registers_size_);
-      }
-    }
-  }
-
   verifier::MethodVerifier* DumpVerifier(VariableIndentationOutputStream* vios,
                                          StackHandleScope<1>* hs,
                                          uint32_t dex_method_idx,
@@ -1405,8 +1129,92 @@
     return nullptr;
   }
 
+  // The StackMapsHelper provides the stack maps in the native PC order.
+  // For identical native PCs, the order from the CodeInfo is preserved.
+  class StackMapsHelper {
+   public:
+    explicit StackMapsHelper(const uint8_t* raw_code_info)
+        : code_info_(raw_code_info),
+          encoding_(code_info_.ExtractEncoding()),
+          number_of_stack_maps_(code_info_.GetNumberOfStackMaps(encoding_)),
+          indexes_(),
+          offset_(static_cast<size_t>(-1)),
+          stack_map_index_(0u) {
+      if (number_of_stack_maps_ != 0u) {
+        // Check if native PCs are ordered.
+        bool ordered = true;
+        StackMap last = code_info_.GetStackMapAt(0u, encoding_);
+        for (size_t i = 1; i != number_of_stack_maps_; ++i) {
+          StackMap current = code_info_.GetStackMapAt(i, encoding_);
+          if (last.GetNativePcOffset(encoding_.stack_map_encoding) >
+              current.GetNativePcOffset(encoding_.stack_map_encoding)) {
+            ordered = false;
+            break;
+          }
+          last = current;
+        }
+        if (!ordered) {
+          // Create indirection indexes for access in native PC order. We do not optimize
+          // for the fact that there can currently be only two separately ordered ranges,
+          // namely normal stack maps and catch-point stack maps.
+          indexes_.resize(number_of_stack_maps_);
+          std::iota(indexes_.begin(), indexes_.end(), 0u);
+          std::sort(indexes_.begin(),
+                    indexes_.end(),
+                    [this](size_t lhs, size_t rhs) {
+                      StackMap left = code_info_.GetStackMapAt(lhs, encoding_);
+                      uint32_t left_pc = left.GetNativePcOffset(encoding_.stack_map_encoding);
+                      StackMap right = code_info_.GetStackMapAt(rhs, encoding_);
+                      uint32_t right_pc = right.GetNativePcOffset(encoding_.stack_map_encoding);
+                      // If the PCs are the same, compare indexes to preserve the original order.
+                      return (left_pc < right_pc) || (left_pc == right_pc && lhs < rhs);
+                    });
+        }
+        offset_ = GetStackMapAt(0).GetNativePcOffset(encoding_.stack_map_encoding);
+      }
+    }
+
+    const CodeInfo& GetCodeInfo() const {
+      return code_info_;
+    }
+
+    const CodeInfoEncoding& GetEncoding() const {
+      return encoding_;
+    }
+
+    size_t GetOffset() const {
+      return offset_;
+    }
+
+    StackMap GetStackMap() const {
+      return GetStackMapAt(stack_map_index_);
+    }
+
+    void Next() {
+      ++stack_map_index_;
+      offset_ = (stack_map_index_ == number_of_stack_maps_)
+          ? static_cast<size_t>(-1)
+          : GetStackMapAt(stack_map_index_).GetNativePcOffset(encoding_.stack_map_encoding);
+    }
+
+   private:
+    StackMap GetStackMapAt(size_t i) const {
+      if (!indexes_.empty()) {
+        i = indexes_[i];
+      }
+      DCHECK_LT(i, number_of_stack_maps_);
+      return code_info_.GetStackMapAt(i, encoding_);
+    }
+
+    const CodeInfo code_info_;
+    const CodeInfoEncoding encoding_;
+    const size_t number_of_stack_maps_;
+    dchecked_vector<size_t> indexes_;  // Used if stack map native PCs are not ordered.
+    size_t offset_;
+    size_t stack_map_index_;
+  };
+
   void DumpCode(VariableIndentationOutputStream* vios,
-                verifier::MethodVerifier* verifier,
                 const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item,
                 bool bad_input, size_t code_size) {
     const void* quick_code = oat_method.GetQuickCode();
@@ -1417,24 +1225,34 @@
     if (code_size == 0 || quick_code == nullptr) {
       vios->Stream() << "NO CODE!\n";
       return;
+    } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
+      // The optimizing compiler outputs its CodeInfo data in the vmap table.
+      StackMapsHelper helper(oat_method.GetVmapTable());
+      const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
+      size_t offset = 0;
+      while (offset < code_size) {
+        offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
+        if (offset == helper.GetOffset()) {
+          ScopedIndentation indent1(vios);
+          StackMap stack_map = helper.GetStackMap();
+          DCHECK(stack_map.IsValid());
+          stack_map.Dump(vios,
+                         helper.GetCodeInfo(),
+                         helper.GetEncoding(),
+                         oat_method.GetCodeOffset(),
+                         code_item->registers_size_);
+          do {
+            helper.Next();
+            // There may be multiple stack maps at a given PC. We display only the first one.
+          } while (offset == helper.GetOffset());
+        }
+        DCHECK_LT(offset, helper.GetOffset());
+      }
     } else {
       const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
       size_t offset = 0;
       while (offset < code_size) {
-        if (!bad_input) {
-          DumpInformationAtOffset(vios, oat_method, code_item, offset, false);
-        }
         offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
-        if (!bad_input) {
-          uint32_t dex_pc =
-              DumpInformationAtOffset(vios, oat_method, code_item, offset, true);
-          if (dex_pc != DexFile::kDexNoIndex) {
-            DumpGcMapAtNativePcOffset(vios->Stream(), oat_method, code_item, offset);
-            if (verifier != nullptr) {
-              DumpVRegsAtDexPc(vios->Stream(), verifier, oat_method, code_item, dex_pc);
-            }
-          }
-        }
       }
     }
   }
@@ -1617,11 +1435,10 @@
       indent_os << "\n";
       // TODO: Dump fields.
       // Dump methods after.
-      const auto& methods_section = image_header_.GetMethodsSection();
       DumpArtMethodVisitor visitor(this);
-      methods_section.VisitPackedArtMethods(&visitor,
-                                            image_space_.Begin(),
-                                            image_header_.GetPointerSize());
+      image_header_.VisitPackedArtMethods(&visitor,
+                                          image_space_.Begin(),
+                                          image_header_.GetPointerSize());
       // Dump the large objects separately.
       heap->GetLargeObjectsSpace()->GetLiveBitmap()->Walk(ImageDumper::Callback, this);
       indent_os << "\n";
@@ -1980,13 +1797,10 @@
     DCHECK(method != nullptr);
     const void* quick_oat_code_begin = GetQuickOatCodeBegin(method);
     const void* quick_oat_code_end = GetQuickOatCodeEnd(method);
+    const size_t pointer_size = image_header_.GetPointerSize();
     OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(
         reinterpret_cast<uintptr_t>(quick_oat_code_begin) - sizeof(OatQuickMethodHeader));
     if (method->IsNative()) {
-      if (!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(quick_oat_code_begin)) {
-        DCHECK(method_header->GetNativeGcMap() == nullptr) << PrettyMethod(method);
-        DCHECK(method_header->GetMappingTable() == nullptr) << PrettyMethod(method);
-      }
       bool first_occurrence;
       uint32_t quick_oat_code_size = GetQuickOatCodeSize(method);
       ComputeOatSize(quick_oat_code_begin, &first_occurrence);
@@ -1997,30 +1811,22 @@
           image_header_.GetPointerSize())) {
         indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code_begin);
       }
-    } else if (method->IsAbstract() ||
-               method->IsCalleeSaveMethod() ||
-               method->IsResolutionMethod() ||
-               (method == Runtime::Current()->GetImtConflictMethod()) ||
-               method->IsImtUnimplementedMethod() ||
-               method->IsClassInitializer()) {
+    } else if (method->IsAbstract() || method->IsClassInitializer()) {
       // Don't print information for these.
+    } else if (method->IsRuntimeMethod()) {
+      ImtConflictTable* table = method->GetImtConflictTable(image_header_.GetPointerSize());
+      if (table != nullptr) {
+        indent_os << "IMT conflict table " << table << " method: ";
+        for (size_t i = 0, count = table->NumEntries(pointer_size); i < count; ++i) {
+          indent_os << PrettyMethod(table->GetImplementationMethod(i, pointer_size)) << " ";
+        }
+      }
     } else {
       const DexFile::CodeItem* code_item = method->GetCodeItem();
       size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
       stats_.dex_instruction_bytes += dex_instruction_bytes;
 
       bool first_occurrence;
-      size_t gc_map_bytes = ComputeOatSize(method_header->GetNativeGcMap(), &first_occurrence);
-      if (first_occurrence) {
-        stats_.gc_map_bytes += gc_map_bytes;
-      }
-
-      size_t pc_mapping_table_bytes = ComputeOatSize(
-          method_header->GetMappingTable(), &first_occurrence);
-      if (first_occurrence) {
-        stats_.pc_mapping_table_bytes += pc_mapping_table_bytes;
-      }
-
       size_t vmap_table_bytes = 0u;
       if (!method_header->IsOptimized()) {
         // Method compiled with the optimizing compiler have no vmap table.
@@ -2049,11 +1855,12 @@
       uint32_t method_access_flags = method->GetAccessFlags();
 
       indent_os << StringPrintf("OAT CODE: %p-%p\n", quick_oat_code_begin, quick_oat_code_end);
-      indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd AccessFlags=0x%x\n",
-                                dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes,
+      indent_os << StringPrintf("SIZE: Dex Instructions=%zd StackMaps=%zd AccessFlags=0x%x\n",
+                                dex_instruction_bytes,
+                                vmap_table_bytes,
                                 method_access_flags);
 
-      size_t total_size = dex_instruction_bytes + gc_map_bytes + pc_mapping_table_bytes +
+      size_t total_size = dex_instruction_bytes +
           vmap_table_bytes + quick_oat_code_size + ArtMethod::Size(image_header_.GetPointerSize());
 
       double expansion =
@@ -2098,8 +1905,6 @@
     size_t large_initializer_code_bytes;
     size_t large_method_code_bytes;
 
-    size_t gc_map_bytes;
-    size_t pc_mapping_table_bytes;
     size_t vmap_table_bytes;
 
     size_t dex_instruction_bytes;
@@ -2128,8 +1933,6 @@
           class_initializer_code_bytes(0),
           large_initializer_code_bytes(0),
           large_method_code_bytes(0),
-          gc_map_bytes(0),
-          pc_mapping_table_bytes(0),
           vmap_table_bytes(0),
           dex_instruction_bytes(0) {}
 
@@ -2348,11 +2151,7 @@
                            PercentOfOatBytes(oat_dex_file_size.second));
       }
 
-      os << "\n" << StringPrintf("gc_map_bytes           = %7zd (%2.0f%% of oat file bytes)\n"
-                                 "pc_mapping_table_bytes = %7zd (%2.0f%% of oat file bytes)\n"
-                                 "vmap_table_bytes       = %7zd (%2.0f%% of oat file bytes)\n\n",
-                                 gc_map_bytes, PercentOfOatBytes(gc_map_bytes),
-                                 pc_mapping_table_bytes, PercentOfOatBytes(pc_mapping_table_bytes),
+      os << "\n" << StringPrintf("vmap_table_bytes       = %7zd (%2.0f%% of oat file bytes)\n\n",
                                  vmap_table_bytes, PercentOfOatBytes(vmap_table_bytes))
          << std::flush;
 
@@ -2587,10 +2386,6 @@
       oat_filename_ = option.substr(strlen("--oat-file=")).data();
     } else if (option.starts_with("--image=")) {
       image_location_ = option.substr(strlen("--image=")).data();
-    } else if (option =="--dump:raw_mapping_table") {
-      dump_raw_mapping_table_ = true;
-    } else if (option == "--dump:raw_gc_map") {
-      dump_raw_gc_map_ = true;
     } else if (option == "--no-dump:vmap") {
       dump_vmap_ = false;
     } else if (option =="--dump:code_info_stack_maps") {
@@ -2680,12 +2475,6 @@
     usage += Base::GetUsage();
 
     usage +=  // Optional.
-        "  --dump:raw_mapping_table enables dumping of the mapping table.\n"
-        "      Example: --dump:raw_mapping_table\n"
-        "\n"
-        "  --dump:raw_gc_map enables dumping of the GC map.\n"
-        "      Example: --dump:raw_gc_map\n"
-        "\n"
         "  --no-dump:vmap may be used to disable vmap dumping.\n"
         "      Example: --no-dump:vmap\n"
         "\n"
@@ -2736,8 +2525,6 @@
   const char* method_filter_ = "";
   const char* image_location_ = nullptr;
   std::string elf_filename_prefix_;
-  bool dump_raw_mapping_table_ = false;
-  bool dump_raw_gc_map_ = false;
   bool dump_vmap_ = true;
   bool dump_code_info_stack_maps_ = false;
   bool disassemble_code_ = true;
@@ -2760,8 +2547,6 @@
     bool absolute_addresses = (args_->oat_filename_ == nullptr);
 
     oat_dumper_options_.reset(new OatDumperOptions(
-        args_->dump_raw_mapping_table_,
-        args_->dump_raw_gc_map_,
         args_->dump_vmap_,
         args_->dump_code_info_stack_maps_,
         args_->disassemble_code_,
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index 3e420ad..c7ced8a 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -69,6 +69,7 @@
       exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
     } else if (mode == kModeArt) {
       exec_argv.push_back("--image=" + core_art_location_);
+      exec_argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
       exec_argv.push_back("--output=/dev/null");
     } else {
       CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
@@ -84,8 +85,8 @@
   std::string core_oat_location_;
 };
 
-// Disable tests on arm as they are taking too long to run for hammerhead. b/27824283.
-#ifndef __arm__
+// Disable tests on arm and mips as they are taking too long to run. b/27824283.
+#if !defined(__arm__) && !defined(__mips__)
 TEST_F(OatDumpTest, TestImage) {
   std::string error_msg;
   ASSERT_TRUE(Exec(kModeArt, {}, &error_msg)) << error_msg;
@@ -96,16 +97,6 @@
   ASSERT_TRUE(Exec(kModeOat, {}, &error_msg)) << error_msg;
 }
 
-TEST_F(OatDumpTest, TestDumpRawMappingTable) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--dump:raw_mapping_table"}, &error_msg)) << error_msg;
-}
-
-TEST_F(OatDumpTest, TestDumpRawGcMap) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--dump:raw_gc_map"}, &error_msg)) << error_msg;
-}
-
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
   ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, &error_msg)) << error_msg;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 63ae342..0a7ffda 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -320,11 +320,11 @@
 
     PatchOat& p = space_to_patchoat_map.find(space)->second;
 
-    if (!p.WriteImage(output_image_file.get())) {
-      LOG(ERROR) << "Failed to write image file " << output_image_file->GetPath();
+    bool success = p.WriteImage(output_image_file.get());
+    success = FinishFile(output_image_file.get(), success);
+    if (!success) {
       return false;
     }
-    FinishFile(output_image_file.get(), true);
 
     bool skip_patching_oat = space_to_skip_patching_map.find(space)->second;
     if (!skip_patching_oat) {
@@ -336,11 +336,11 @@
         LOG(ERROR) << "Failed to open output oat file at " << output_oat_filename;
         return false;
       }
-      if (!p.WriteElf(output_oat_file.get())) {
-        LOG(ERROR) << "Failed to write oat file " << output_oat_file->GetPath();
+      success = p.WriteElf(output_oat_file.get());
+      success = FinishFile(output_oat_file.get(), success);
+      if (!success) {
         return false;
       }
-      FinishFile(output_oat_file.get(), true);
     }
   }
   return true;
@@ -472,8 +472,7 @@
 
 void PatchOat::PatchArtFields(const ImageHeader* image_header) {
   PatchOatArtFieldVisitor visitor(this);
-  const auto& section = image_header->GetImageSection(ImageHeader::kSectionArtFields);
-  section.VisitPackedArtFields(&visitor, heap_->Begin());
+  image_header->VisitPackedArtFields(&visitor, heap_->Begin());
 }
 
 class PatchOatArtMethodVisitor : public ArtMethodVisitor {
@@ -490,10 +489,20 @@
 };
 
 void PatchOat::PatchArtMethods(const ImageHeader* image_header) {
-  const auto& section = image_header->GetMethodsSection();
   const size_t pointer_size = InstructionSetPointerSize(isa_);
   PatchOatArtMethodVisitor visitor(this);
-  section.VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
+  image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
+}
+
+void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
+  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  // We can safely walk target image since the conflict tables are independent.
+  image_header->VisitPackedImtConflictTables(
+      [this](ArtMethod* method) {
+        return RelocatedAddressOfPointer(method);
+      },
+      image_->Begin(),
+      pointer_size);
 }
 
 class FixupRootVisitor : public RootVisitor {
@@ -627,6 +636,7 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
+  PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
   PatchClassTable(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
@@ -650,12 +660,6 @@
   return true;
 }
 
-bool PatchOat::InHeap(mirror::Object* o) {
-  uintptr_t begin = reinterpret_cast<uintptr_t>(heap_->Begin());
-  uintptr_t end = reinterpret_cast<uintptr_t>(heap_->End());
-  uintptr_t obj = reinterpret_cast<uintptr_t>(o);
-  return o == nullptr || (begin <= obj && obj < end);
-}
 
 void PatchOat::PatchVisitor::operator() (mirror::Object* obj, MemberOffset off,
                                          bool is_static_unused ATTRIBUTE_UNUSED) const {
@@ -668,7 +672,8 @@
                                          mirror::Reference* ref) const {
   MemberOffset off = mirror::Reference::ReferentOffset();
   mirror::Object* referent = ref->GetReferent();
-  DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
+  DCHECK(referent == nullptr ||
+         Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(referent)) << referent;
   mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
@@ -730,6 +735,7 @@
       RelocatedAddressOfPointer(object->GetDexCacheResolvedTypes(pointer_size)), pointer_size);
   copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
       object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
+  // No special handling for IMT conflict table since all pointers are moved by the same offset.
   copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
       object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
 }
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index a6a8fee..3ef837f 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -106,7 +106,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupMethod(ArtMethod* object, ArtMethod* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool InHeap(mirror::Object*);
 
   // Patches oat in place, modifying the oat_file given to the constructor.
   bool PatchElf();
@@ -118,6 +117,8 @@
   bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchImtConflictTables(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchClassTable(const ImageHeader* image_header)
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index 58e8a3a..a25460e 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -21,44 +21,41 @@
 
 namespace art {
 
-// Minimum number of new methods that profiles must contain to enable recompilation.
+// Minimum number of new methods/classes that profiles
+// must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+static constexpr const uint32_t kMinNewClassesForCompilation = 10;
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
         const ScopedFlock& reference_profile_file) {
   DCHECK(!profile_files.empty());
 
-  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
-  bool should_compile = false;
-  // Read the main profile files.
-  for (size_t i = 0; i < new_info.size(); i++) {
-    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
-      LOG(WARNING) << "Could not load profile file at index " << i;
-      return kErrorBadProfiles;
-    }
-    // Do we have enough new profiled methods that will make the compilation worthwhile?
-    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
-  }
-
-  if (!should_compile) {
-    return kSkipCompilation;
-  }
-
-  // Merge information.
   ProfileCompilationInfo info;
+  // Load the reference profile.
   if (!info.Load(reference_profile_file.GetFile()->Fd())) {
     LOG(WARNING) << "Could not load reference profile file";
     return kErrorBadProfiles;
   }
 
-  for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    if (!info.Load(new_info[i])) {
-      LOG(WARNING) << "Could not merge profile data at index " << i;
+  // Store the current state of the reference profile before merging with the current profiles.
+  uint32_t number_of_methods = info.GetNumberOfMethods();
+  uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
+
+  // Merge all current profiles.
+  for (size_t i = 0; i < profile_files.size(); i++) {
+    if (!info.Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return kErrorBadProfiles;
     }
   }
+
+  // Check if there is enough new information added by the current profiles.
+  if (((info.GetNumberOfMethods() - number_of_methods) < kMinNewMethodsForCompilation) &&
+      ((info.GetNumberOfResolvedClasses() - number_of_classes) < kMinNewClassesForCompilation)) {
+    return kSkipCompilation;
+  }
+
   // We were successful in merging all profile information. Update the reference profile.
   if (!reference_profile_file.GetFile()->ClearContent()) {
     PLOG(WARNING) << "Could not clear reference profile file";
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index b0d5df2..462c397 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -29,6 +29,7 @@
   void SetupProfile(const std::string& id,
                     uint32_t checksum,
                     uint16_t number_of_methods,
+                    uint16_t number_of_classes,
                     const ScratchFile& profile,
                     ProfileCompilationInfo* info,
                     uint16_t start_method_index = 0) {
@@ -40,6 +41,10 @@
       ASSERT_TRUE(info->AddMethodIndex(dex_location1, dex_location_checksum1, i));
       ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
     }
+    for (uint16_t i = 0; i < number_of_classes; i++) {
+      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, i));
+    }
+
     ASSERT_TRUE(info->Save(GetFd(profile)));
     ASSERT_EQ(0, profile.GetFile()->Flush());
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
@@ -89,9 +94,9 @@
 
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
   // We should advise compilation.
   ASSERT_EQ(ProfileAssistant::kCompile,
@@ -102,8 +107,8 @@
   ASSERT_TRUE(result.Load(reference_profile_fd));
 
   ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
   ASSERT_TRUE(expected.Equals(result));
 
   // The information from profiles must remain the same.
@@ -111,6 +116,35 @@
   CheckProfileInfo(profile2, info2);
 }
 
+// TODO(calin): Add more tests for classes.
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferencesBecauseOfClasses) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfClassesToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, 0, kNumberOfClassesToEnableCompilation, profile1, &info1);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
 TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
   ScratchFile profile1;
   ScratchFile profile2;
@@ -124,15 +158,15 @@
   // The new profile info will contain the methods with indices 0-100.
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
 
   // The reference profile info will contain the methods with indices 50-150.
   const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
   ProfileCompilationInfo reference_info;
-  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile,
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
       &reference_info, kNumberOfMethodsToEnableCompilation / 2);
 
   // We should advise compilation.
@@ -145,9 +179,9 @@
   ASSERT_TRUE(result.Load(reference_profile_fd));
 
   ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
-  ASSERT_TRUE(expected.Load(reference_info));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
+  ASSERT_TRUE(expected.MergeWith(reference_info));
   ASSERT_TRUE(expected.Equals(result));
 
   // The information from profiles must remain the same.
@@ -167,9 +201,9 @@
 
   const uint16_t kNumberOfMethodsToSkipCompilation = 1;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, 0, profile2, &info2);
 
   // We should not advise compilation.
   ASSERT_EQ(ProfileAssistant::kSkipCompilation,
@@ -207,9 +241,9 @@
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   // Assign different hashes for the same dex file. This will make merging of information to fail.
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
   // We should fail processing.
   ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
@@ -234,9 +268,9 @@
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   // Assign different hashes for the same dex file. This will make merging of information to fail.
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo reference_info;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, reference_profile, &reference_info);
 
   // We should not advise compilation.
   ASSERT_TRUE(profile1.GetFile()->ResetOffset());
diff --git a/profman/profman.cc b/profman/profman.cc
index 7c9e449..37a560d 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
+#include "errno.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
+#include <iostream>
 #include <string>
 #include <vector>
 
@@ -68,6 +70,9 @@
   UsageError("Command: %s", CommandLine().c_str());
   UsageError("Usage: profman [options]...");
   UsageError("");
+  UsageError("  --dump-info-for=<filename>: dumps the content of the profile file");
+  UsageError("      to standard output in a human readable form.");
+  UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
   UsageError("      Can be specified multiple time, in which case the data from the different");
   UsageError("      profiles will be aggregated.");
@@ -117,9 +122,11 @@
       const StringPiece option(argv[i]);
       const bool log_options = false;
       if (log_options) {
-        LOG(INFO) << "patchoat: option[" << i << "]=" << argv[i];
+        LOG(INFO) << "profman: option[" << i << "]=" << argv[i];
       }
-      if (option.starts_with("--profile-file=")) {
+      if (option.starts_with("--dump-info-for=")) {
+        dump_info_for_ = option.substr(strlen("--dump-info-for=")).ToString();
+      } else if (option.starts_with("--profile-file=")) {
         profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
       } else if (option.starts_with("--profile-file-fd=")) {
         ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
@@ -132,13 +139,23 @@
       }
     }
 
-    if (profile_files_.empty() && profile_files_fd_.empty()) {
+    bool has_profiles = !profile_files_.empty() || !profile_files_fd_.empty();
+    bool has_reference_profile = !reference_profile_file_.empty() ||
+        (reference_profile_file_fd_ != -1);
+
+    if (!dump_info_for_.empty()) {
+      if (has_profiles || has_reference_profile) {
+        Usage("dump-info-for cannot be specified together with other options");
+      }
+      return;
+    }
+    if (!has_profiles) {
       Usage("No profile files specified.");
     }
     if (!profile_files_.empty() && !profile_files_fd_.empty()) {
       Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
     }
-    if (!reference_profile_file_.empty() && (reference_profile_file_fd_ != -1)) {
+    if (!has_reference_profile) {
       Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
     if (reference_profile_file_.empty() && (reference_profile_file_fd_ == -1)) {
@@ -160,6 +177,27 @@
     return result;
   }
 
+  int DumpProfileInfo() {
+    int fd = open(dump_info_for_.c_str(), O_RDWR);
+    if (fd < 0) {
+      std::cerr << "Cannot open " << dump_info_for_ << strerror(errno);
+      return -1;
+    }
+    ProfileCompilationInfo info;
+    if (!info.Load(fd)) {
+      std::cerr << "Cannot load profile info from " << dump_info_for_;
+      return -1;
+    }
+    std::string dump = info.DumpInfo(/*dex_files*/ nullptr);
+    info.Save(fd);
+    std::cout << dump << "\n";
+    return 0;
+  }
+
+  bool ShouldOnlyDumpProfile() {
+    return !dump_info_for_.empty();
+  }
+
  private:
   static void ParseFdForCollection(const StringPiece& option,
                                    const char* arg_name,
@@ -178,7 +216,11 @@
   }
 
   void LogCompletionTime() {
-    LOG(INFO) << "profman took " << PrettyDuration(NanoTime() - start_ns_);
+    static constexpr uint64_t kLogThresholdTime = MsToNs(100);  // 100ms
+    uint64_t time_taken = NanoTime() - start_ns_;
+    if (time_taken > kLogThresholdTime) {
+      LOG(WARNING) << "profman took " << PrettyDuration(time_taken);
+    }
   }
 
   std::vector<std::string> profile_files_;
@@ -186,6 +228,7 @@
   std::string reference_profile_file_;
   int reference_profile_file_fd_;
   uint64_t start_ns_;
+  std::string dump_info_for_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.
@@ -195,6 +238,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  if (profman.ShouldOnlyDumpProfile()) {
+    return profman.DumpProfileInfo();
+  }
   // Process profile information and assess if we need to do a profile guided compilation.
   // This operation involves I/O.
   return profman.ProcessProfiles();
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0c6541e..aa12c83 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -106,7 +106,6 @@
   jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
-  jit/jit_instrumentation.cc \
   jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   jit/profile_saver.cc  \
@@ -149,6 +148,7 @@
   native/java_lang_VMClassLoader.cc \
   native/java_lang_ref_FinalizerReference.cc \
   native/java_lang_ref_Reference.cc \
+  native/java_lang_reflect_AbstractMethod.cc \
   native/java_lang_reflect_Array.cc \
   native/java_lang_reflect_Constructor.cc \
   native/java_lang_reflect_Field.cc \
@@ -187,7 +187,6 @@
   type_lookup_table.cc \
   utf.cc \
   utils.cc \
-  verifier/dex_gc_map.cc \
   verifier/instruction_flags.cc \
   verifier/method_verifier.cc \
   verifier/reg_type.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index e358ff8..f0e9ac5 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -47,67 +48,12 @@
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   qpoints->pIdivmod = __aeabi_idivmod;
   qpoints->pLdiv = __aeabi_ldivmod;
@@ -154,35 +100,6 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierMark = artReadBarrierMark;
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index 51f992b..ffac030 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -16,7 +16,7 @@
 
 #include "instruction_set_features_arm.h"
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
 #include <sys/auxv.h>
 #include <asm/hwcap.h>
 #endif
@@ -166,7 +166,7 @@
   bool has_div = false;
   bool has_lpae = false;
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(ART_TARGET_ANDROID) && defined(__arm__)
   uint64_t hwcaps = getauxval(AT_HWCAP);
   LOG(INFO) << "hwcaps=" << hwcaps;
   if ((hwcaps & HWCAP_IDIVT) != 0) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5a901f1..321b9d2 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -544,6 +544,15 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
+    mov    r1, r9                     @ pass Thread::Current
+    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      * r0 holds the possibly null object to lock.
@@ -601,6 +610,16 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    @ save callee saves in case exception allocation triggers GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
+    mov    r1, r9                     @ pass Thread::Current
+    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
      * artThrowClassCastException.
@@ -1247,7 +1266,7 @@
     ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]!
     b .Limt_table_iterate
 .Limt_table_found:
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     ldr r0, [r0, #__SIZEOF_POINTER__]
     ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
@@ -1832,7 +1851,7 @@
     add   sp, #4
     .cfi_adjust_cfa_offset -4
     pop   {pc}
-END art_quick_fmod
+END art_quick_fmodf
 
     /* int64_t art_d2l(double d) */
     .extern art_d2l
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 4db9411..bf0f647 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -30,67 +31,12 @@
                                             const mirror::Class* ref_class);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   // TODO null entrypoints not needed for ARM64 - generate inline.
   qpoints->pCmpgDouble = nullptr;
@@ -134,38 +80,10 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM64 StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierMark = artReadBarrierMark;
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 613bb5c..cad13b2 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -39,7 +39,7 @@
   if (!needs_a53_835769_fix) {
     // Check to see if this is an expected variant.
     static const char* arm64_known_variants[] = {
-        "denver64", "kryo"
+        "denver64", "kryo", "exynos-m1"
     };
     if (!FindVariantInArray(arm64_known_variants, arraysize(arm64_known_variants), variant)) {
       std::ostringstream os;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b497fe..1fba09b 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1113,6 +1113,14 @@
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case we block
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     artLockObjectFromCode      // (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      * x0 holds the possibly null object to lock.
@@ -1171,6 +1179,14 @@
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case exception allocation triggers GC
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
      * artThrowClassCastException.
@@ -1638,23 +1654,17 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
-ENTRY art_quick_alloc_object_tlab
-    // Fast path tlab allocation.
-    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
-    // x2-x7: free.
-#if defined(USE_READ_BARRIER)
-    mvn    x0, xzr                                            // Read barrier not supported here.
-    ret                                                       // Return -1.
-#endif
-    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
-                                                              // Load the class (x2)
-    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-    cbz    x2, .Lart_quick_alloc_object_tlab_slow_path        // Check null class
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
+// x3-x7: free.
+// Need to preserve x0 and x1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    x2, \slowPathLabel                                 // Check null class
                                                               // Check class status.
     ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
     cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
-    bne    .Lart_quick_alloc_object_tlab_slow_path
+    bne    \slowPathLabel
                                                               // Add a fake dependence from the
                                                               // following access flag and size
                                                               // loads to the status load.
@@ -1668,7 +1678,7 @@
                                                               // Check access flags has
                                                               // kAccClassIsFinalizable.
     ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
-    tbnz   x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, .Lart_quick_alloc_object_tlab_slow_path
+    tbnz   x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, \slowPathLabel
                                                               // Load thread_local_pos (x4) and
                                                               // thread_local_end (x5).
     ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
@@ -1678,7 +1688,7 @@
     cmp    x7, x6                                             // Check if it fits. OK to do this
                                                               // before rounding up the object size
                                                               // assuming the buf size alignment.
-    bhi    .Lart_quick_alloc_object_tlab_slow_path
+    bhi    \slowPathLabel
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
                                                               // Round up the object size by the
                                                               // object alignment. (addr + 7) & ~7.
@@ -1703,6 +1713,21 @@
                                                               // class status load.)
     dmb    ish
     ret
+.endm
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+#if defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier not supported here.
+    ret                                                       // Return -1.
+#endif
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // Save callee saves in case of GC.
     mov    x2, xSELF                     // Pass Thread::Current.
@@ -1711,7 +1736,42 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_tlab
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+ENTRY art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+#if !defined(USE_READ_BARRIER)
+    mvn    x0, xzr                                            // Read barrier must be enabled here.
+    ret                                                       // Return -1.
+#endif
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+                                                              // Read barrier for class load.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
+    str    xLR, [sp, #16]                                     // Align sp by 16 bytes.
+    mov    x0, x2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x2, x0                                             // Get the (marked) class back.
+    ldp    x0, x1, [sp, #0]                                   // Restore registers.
+    ldr    xLR, [sp, #16]
+    add    sp, sp, #32
+    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // Save callee saves in case of GC.
+    mov    x2, xSELF                           // Pass Thread::Current.
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_region_tlab
 
     /*
      * Called by managed code when the thread has been asked to suspend.
@@ -1780,7 +1840,7 @@
     ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
     b .Limt_table_iterate
 .Limt_table_found:
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     ldr x0, [xIP1, #__SIZEOF_POINTER__]
     ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
@@ -2143,108 +2203,3 @@
     asr   x0, x0, #1
     ret
 END art_quick_indexof
-
-   /*
-     * String's compareTo.
-     *
-     * TODO: Not very optimized.
-     *
-     * On entry:
-     *    x0:   this object pointer
-     *    x1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    x2, x0         // x0 is return, use x2 for first input.
-    sub    x0, x2, x1     // Same string object?
-    cbnz   x0,1f
-    ret
-1:                        // Different string objects.
-
-    ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
-    add    x2, x2, #MIRROR_STRING_VALUE_OFFSET
-    add    x1, x1, #MIRROR_STRING_VALUE_OFFSET
-
-    /*
-     * Now:           Data*  Count
-     *    first arg    x2      w4
-     *   second arg    x1      w3
-     */
-
-    // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
-    subs x0, x4, x3
-    // Min(count1, count2) into w3.
-    csel x3, x3, x4, ge
-
-    // TODO: Tune this value.
-    // Check for long string, do memcmp16 for them.
-    cmp w3, #28  // Constant from arm32.
-    bgt .Ldo_memcmp16
-
-    /*
-     * Now:
-     *   x2: *first string data
-     *   x1: *second string data
-     *   w3: iteration count
-     *   x0: return value if comparison equal
-     *   x4, x5, x6, x7: free
-     */
-
-    // Do a simple unrolled loop.
-.Lloop:
-    // At least two more elements?
-    subs w3, w3, #2
-    b.lt .Lremainder_or_done
-
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-
-    ldrh w6, [x2], #2
-    ldrh w7, [x1], #2
-
-    subs w4, w4, w5
-    b.ne .Lw4_result
-
-    subs w6, w6, w7
-    b.ne .Lw6_result
-
-    b .Lloop
-
-.Lremainder_or_done:
-    adds w3, w3, #1
-    b.eq .Lremainder
-    ret
-
-.Lremainder:
-    ldrh w4, [x2], #2
-    ldrh w5, [x1], #2
-    subs w4, w4, w5
-    b.ne .Lw4_result
-    ret
-
-// Result is in w4
-.Lw4_result:
-    sxtw x0, w4
-    ret
-
-// Result is in w6
-.Lw6_result:
-    sxtw x0, w6
-    ret
-
-.Ldo_memcmp16:
-    mov x14, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
-    mov x15, xLR                 //                 TODO: Codify and check that?
-
-    mov x0, x2
-    uxtw x2, w3
-    bl __memcmp16
-
-    mov xLR, x15                 // Restore LR.
-
-    cmp x0, #0                   // Check the memcmp difference.
-    csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
-    ret
-END art_quick_string_compareto
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
index 99c2d4d..fb38b47 100644
--- a/runtime/arch/instruction_set_features_test.cc
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -26,7 +26,7 @@
 
 namespace art {
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #if defined(__aarch64__)
 TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyVariant) {
   LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
@@ -111,7 +111,7 @@
 }
 #endif
 
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 TEST(InstructionSetFeaturesTest, HostFeaturesFromCppDefines) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> default_features(
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 51eb77f..45e33a8 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -59,6 +59,9 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
+  // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
+  //       to InitDefaultEntryPoints().
+
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
@@ -167,9 +170,14 @@
                 "Non-direct C stub marked direct.");
 
   // Locks
-  qpoints->pLockObject = art_quick_lock_object;
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    qpoints->pLockObject = art_quick_lock_object_no_inline;
+    qpoints->pUnlockObject = art_quick_unlock_object_no_inline;
+  } else {
+    qpoints->pLockObject = art_quick_lock_object;
+    qpoints->pUnlockObject = art_quick_unlock_object;
+  }
   static_assert(!IsDirectEntrypoint(kQuickLockObject), "Non-direct C stub marked direct.");
-  qpoints->pUnlockObject = art_quick_unlock_object;
   static_assert(!IsDirectEntrypoint(kQuickUnlockObject), "Non-direct C stub marked direct.");
 
   // Math
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index fd1851f..3ee26af 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -319,6 +319,111 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   [sp + 16] = shorty
+     *   [sp + 20] = thread
+     */
+ENTRY art_quick_osr_stub
+    // Save callee general purpose registers, RA and GP.
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    sw     $s1, 8($sp)
+    .cfi_rel_offset 17, 8
+    sw     $s0, 4($sp)
+    .cfi_rel_offset 16, 4
+
+    move   $s8, $sp                        # Save the stack pointer
+    move   $s7, $a1                        # Save size of stack
+    move   $s6, $a2                        # Save the pc to call
+    lw     rSELF, 48+20($sp)               # Save managed thread pointer into rSELF
+    addiu  $t0, $sp, -12                   # Reserve space for stack pointer,
+                                           #    JValue* result, and ArtMethod* slot.
+    srl    $t0, $t0, 4                     # Align stack pointer to 16 bytes
+    sll    $sp, $t0, 4                     # Update stack pointer
+    sw     $s8, 4($sp)                     # Save old stack pointer
+    sw     $a3, 8($sp)                     # Save JValue* result
+    sw     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    subu   $sp, $a1                        # Reserve space for callee stack
+    move   $a2, $a1
+    move   $a1, $a0
+    move   $a0, $sp
+    la     $t9, memcpy
+    jalr   $t9                             # memcpy (dest a0, src a1, bytes a2)
+    addiu  $sp, $sp, -16                   # make space for argument slots for memcpy
+    bal    .Losr_entry                     # Call the method
+    addiu  $sp, $sp, 16                    # restore stack after memcpy
+    lw     $a2, 8($sp)                     # Restore JValue* result
+    lw     $sp, 4($sp)                     # Restore saved stack pointer
+    lw     $a0, 48+16($sp)                 # load shorty
+    lbu    $a0, 0($a0)                     # load return type
+    li     $a1, 'D'                        # put char 'D' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'D'
+    li     $a1, 'F'                        # put char 'F' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'F'
+    nop
+    sw     $v0, 0($a2)
+    b      .Losr_exit
+    sw     $v1, 4($a2)                     # store v0/v1 into result
+.Losr_fp_result:
+    SDu    $f0, $f1, 0, $a2, $t0           # store f0/f1 into result
+.Losr_exit:
+    lw     $ra, 44($sp)
+    .cfi_restore 31
+    lw     $s8, 40($sp)
+    .cfi_restore 30
+    lw     $gp, 36($sp)
+    .cfi_restore 28
+    lw     $s7, 32($sp)
+    .cfi_restore 23
+    lw     $s6, 28($sp)
+    .cfi_restore 22
+    lw     $s5, 24($sp)
+    .cfi_restore 21
+    lw     $s4, 20($sp)
+    .cfi_restore 20
+    lw     $s3, 16($sp)
+    .cfi_restore 19
+    lw     $s2, 12($sp)
+    .cfi_restore 18
+    lw     $s1, 8($sp)
+    .cfi_restore 17
+    lw     $s0, 4($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    addiu  $sp, $sp, 48
+    .cfi_adjust_cfa_offset -48
+.Losr_entry:
+    addiu  $s7, $s7, -4
+    addu   $t0, $s7, $sp
+    move   $t9, $s6
+    jalr   $zero, $t9
+    sw     $ra, 0($t0)                     # Store RA per the compiler ABI
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -801,6 +906,16 @@
     RETURN_IF_ZERO
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    la      $t9, artLockObjectFromCode
+    jalr    $t9                           # (Object* obj, Thread*)
+    move    $a1, rSELF                    # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      */
@@ -815,6 +930,16 @@
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    beqz    $a0, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case exception allocation triggers GC
+    la      $t9, artUnlockObjectFromCode
+    jalr    $t9                       # (Object* obj, Thread*)
+    move    $a1, rSELF                # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
@@ -1540,11 +1665,41 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    lw      $t1, 0($sp)                                      # Load referrer.
+    lw      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array.
+    sll     $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    addu    $t0, $t1, $t0                                    # Add offset to base.
+    lw      $t0, 0($t0)                                      # Load interface method.
+    lw      $a0, ART_METHOD_JNI_OFFSET_32($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    lw      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqz    $t1, .Lconflict_trampoline
+    nop
+    # Iterate over the entries of the ImtConflictTable.
+    b       .Limt_table_iterate
+    addiu   $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    lw      $a0, __SIZEOF_POINTER__($a0)
+    lw      $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
+    jr      $t9
+    nop
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 4bdb38e..030c127 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -18,6 +18,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/math_entrypoints.h"
@@ -57,67 +58,12 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // Math
   qpoints->pCmpgDouble = CmpgDouble;
   qpoints->pCmpgFloat = CmpgFloat;
@@ -144,35 +90,6 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimization from compiled code.
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // TODO - use lld/scd instructions for Mips64
   // Atomic 64-bit load/store
   qpoints->pA64Load = QuasiAtomic::Read64;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f1e605a..8f1a35a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -358,6 +358,138 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   a4 = shorty
+     *   a5 = thread
+     */
+ENTRY art_quick_osr_stub
+    move   $t0, $sp               # save stack pointer
+    daddiu $t1, $sp, -112         # reserve stack space
+    dsrl   $t1, $t1, 4            # enforce 16 byte stack alignment
+    dsll   $sp, $t1, 4            # update stack pointer
+
+    // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes)
+    sd     $ra, 104($sp)
+    .cfi_rel_offset 31, 104
+    sd     $s8, 96($sp)
+    .cfi_rel_offset 30, 96
+    sd     $t0, 88($sp)           # save original stack pointer stored in t0
+    .cfi_rel_offset 29, 88
+    sd     $t8, 80($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 80        # Value from gp is pushed, so set the cfi offset accordingly.
+    sd     $s7, 72($sp)
+    .cfi_rel_offset 23, 72
+    sd     $s6, 64($sp)
+    .cfi_rel_offset 22, 64
+    sd     $s5, 56($sp)
+    .cfi_rel_offset 21, 56
+    sd     $s4, 48($sp)
+    .cfi_rel_offset 20, 48
+    sd     $s3, 40($sp)
+    .cfi_rel_offset 19, 40
+    sd     $s2, 32($sp)
+    .cfi_rel_offset 18, 32
+    sd     $s1, 24($sp)
+    .cfi_rel_offset 17, 24
+    sd     $s0, 16($sp)
+    .cfi_rel_offset 16, 16
+    sd     $a4, 8($sp)
+    .cfi_rel_offset 8, 8
+    sd     $a3, 0($sp)
+    .cfi_rel_offset 7, 0
+    move   rSELF, $a5                      # Save managed thread pointer into rSELF
+
+    daddiu $sp, $sp, -16
+    jal    .Losr_entry
+    sd     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    daddiu $sp, $sp, 16
+
+    // Restore return value address and shorty address
+    ld     $a4, 8($sp)                     # shorty address
+    .cfi_restore 8
+    ld     $a3, 0($sp)                     # result value address
+    .cfi_restore 7
+
+    lbu    $t1, 0($a4)                     # load return type
+    li     $t2, 'D'                        # put char 'D' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'D'
+    li     $t2, 'F'                        # put char 'F' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'F'
+    nop
+    b      .Losr_exit
+    dsrl   $v1, $v0, 32                    # put high half of result in v1
+.Losr_fp_result:
+    mfc1   $v0, $f0
+    mfhc1  $v1, $f0                        # put high half of FP result in v1
+.Losr_exit:
+    sw     $v0, 0($a3)                     # store low half of result
+    sw     $v1, 4($a3)                     # store high half of result
+
+    // Restore callee registers
+    ld     $ra, 104($sp)
+    .cfi_restore 31
+    ld     $s8, 96($sp)
+    .cfi_restore 30
+    ld     $t0, 88($sp)                    # save SP into t0 for now
+    .cfi_restore 29
+    ld     $t8, 80($sp)                    # Restore gp back to it's temp storage.
+    .cfi_restore 28
+    ld     $s7, 72($sp)
+    .cfi_restore 23
+    ld     $s6, 64($sp)
+    .cfi_restore 22
+    ld     $s5, 56($sp)
+    .cfi_restore 21
+    ld     $s4, 48($sp)
+    .cfi_restore 20
+    ld     $s3, 40($sp)
+    .cfi_restore 19
+    ld     $s2, 32($sp)
+    .cfi_restore 18
+    ld     $s1, 24($sp)
+    .cfi_restore 17
+    ld     $s0, 16($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    move   $sp, $t0
+
+.Losr_entry:
+    dsubu  $sp, $sp, $a1                   # Reserve space for callee stack
+    daddiu $a1, $a1, -8
+    daddu  $t0, $a1, $sp
+    sw     $ra, 0($t0)                     # Store low half of RA per compiler ABI
+    dsrl   $t1, $ra, 32
+    sw     $t1, 4($t0)                     # Store high half of RA per compiler ABI
+
+    // Copy arguments into callee stack
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // a0 = source address
+    // a1 = args length in bytes (does not include 8 bytes for RA)
+    // sp = destination address
+    beqz   $a1, .Losr_loop_exit
+    daddiu $a1, $a1, -4
+    daddu  $t1, $a0, $a1
+    daddu  $t2, $sp, $a1
+.Losr_loop_entry:
+    lw     $t0, 0($t1)
+    daddiu $t1, $t1, -4
+    sw     $t0, 0($t2)
+    bne    $sp, $t2, .Losr_loop_entry
+    daddiu $t2, $t2, -4
+
+.Losr_loop_exit:
+    move   $t9, $a2
+    jalr   $zero, $t9                      # Jump to the OSR entry point.
+    nop
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -839,6 +971,15 @@
     RETURN_IF_ZERO
 END art_quick_lock_object
 
+ENTRY art_quick_lock_object_no_inline
+    beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME     # save callee saves in case we block
+    jal     artLockObjectFromCode         # (Object* obj, Thread*)
+    move    $a1, rSELF                    # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_lock_object_no_inline
+
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
      */
@@ -852,6 +993,15 @@
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
+ENTRY art_quick_unlock_object_no_inline
+    beq     $a0, $zero, .Lart_quick_throw_null_pointer_exception_gp_set
+    nop
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
+    jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
+    move    $a1, rSELF                 # pass Thread::Current
+    RETURN_IF_ZERO
+END art_quick_unlock_object_no_inline
+
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
@@ -1534,11 +1684,40 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Mote that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    ld      $t1, 0($sp)                                      # Load referrer.
+    ld      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1) # Load dex cache methods array.
+    dsll    $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    daddu   $t0, $t1, $t0                                    # Add offset to base.
+    ld      $t0, 0($t0)                                      # Load interface method.
+    ld      $a0, ART_METHOD_JNI_OFFSET_64($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    ld      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqzc   $t1, .Lconflict_trampoline
+    # Iterate over the entries of the ImtConflictTable.
+    daddiu  $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+    bc       .Limt_table_iterate
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    ld      $a0, __SIZEOF_POINTER__($a0)
+    ld      $t9, ART_METHOD_QUICK_CODE_OFFSET_64($a0)
+    jr      $t9
+    .cpreturn                      # Restore gp from t8 in branch delay slot.
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 1c4b470..02629e8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -21,6 +21,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
@@ -1204,7 +1205,8 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || \
+  // There is no StringCompareTo runtime entrypoint for __aarch64__.
+#if defined(__i386__) || defined(__arm__) || \
     defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
@@ -1933,7 +1935,12 @@
   TestFields(self, this, Primitive::Type::kPrimLong);
 }
 
-TEST_F(StubTest, IMT) {
+// Disabled, b/27991555 .
+// FIXME: Hacking the entry point to point to art_quick_to_interpreter_bridge is broken.
+// The bridge calls through to GetCalleeSaveMethodCaller() which looks up the pre-header
+// and gets a bogus OatQuickMethodHeader* pointing into our assembly code just before
+// the bridge and uses that to check for inlined frames, crashing in the process.
+TEST_F(StubTest, DISABLED_IMT) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
@@ -1999,14 +2006,27 @@
   // 1. imt_conflict
 
   // Contains.
-  // TODO(ngeoffray): Re-enable this test. They are now broken with the ImtConflictTable.
-  // b/27794971
-/*
+
+  // We construct the ImtConflictTable ourselves, as we cannot go into the runtime stub
+  // that will create it: the runtime stub expects to be called by compiled code.
+  LinearAlloc* linear_alloc = Runtime::Current()->GetLinearAlloc();
+  ArtMethod* conflict_method = Runtime::Current()->CreateImtConflictMethod(linear_alloc);
+  ImtConflictTable* empty_conflict_table =
+      Runtime::Current()->GetClassLinker()->CreateImtConflictTable(/*count*/0u, linear_alloc);
+  void* data = linear_alloc->Alloc(
+      self,
+      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, sizeof(void*)));
+  ImtConflictTable* new_table = new (data) ImtConflictTable(
+      empty_conflict_table, inf_contains, contains_amethod, sizeof(void*));
+  conflict_method->SetImtConflictTable(new_table, sizeof(void*));
+
   size_t result =
-      Invoke3WithReferrerAndHidden(0U, reinterpret_cast<size_t>(array_list.Get()),
+      Invoke3WithReferrerAndHidden(reinterpret_cast<size_t>(conflict_method),
+                                   reinterpret_cast<size_t>(array_list.Get()),
                                    reinterpret_cast<size_t>(obj.Get()),
                                    StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline),
-                                   self, contains_amethod,
+                                   self,
+                                   contains_amethod,
                                    static_cast<size_t>(inf_contains->GetDexMethodIndex()));
 
   ASSERT_FALSE(self->IsExceptionPending());
@@ -2020,25 +2040,29 @@
 
   // Contains.
 
-  result = Invoke3WithReferrerAndHidden(
-      0U, reinterpret_cast<size_t>(array_list.Get()), reinterpret_cast<size_t>(obj.Get()),
-      StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline), self, contains_amethod,
-      static_cast<size_t>(inf_contains->GetDexMethodIndex()));
+  result =
+      Invoke3WithReferrerAndHidden(reinterpret_cast<size_t>(conflict_method),
+                                   reinterpret_cast<size_t>(array_list.Get()),
+                                   reinterpret_cast<size_t>(obj.Get()),
+                                   StubTest::GetEntrypoint(self, kQuickQuickImtConflictTrampoline),
+                                   self,
+                                   contains_amethod,
+                                   static_cast<size_t>(inf_contains->GetDexMethodIndex()));
 
   ASSERT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
-*/
+
   // 2. regular interface trampoline
 
-  size_t result = Invoke3WithReferrer(static_cast<size_t>(inf_contains->GetDexMethodIndex()),
-                                      reinterpret_cast<size_t>(array_list.Get()),
-                                      reinterpret_cast<size_t>(obj.Get()),
-                                      StubTest::GetEntrypoint(self,
-                                         kQuickInvokeInterfaceTrampolineWithAccessCheck),
-                                      self, contains_amethod);
+  result = Invoke3WithReferrer(static_cast<size_t>(inf_contains->GetDexMethodIndex()),
+                               reinterpret_cast<size_t>(array_list.Get()),
+                               reinterpret_cast<size_t>(obj.Get()),
+                               StubTest::GetEntrypoint(self,
+                                   kQuickInvokeInterfaceTrampolineWithAccessCheck),
+                               self, contains_amethod);
 
   ASSERT_FALSE(self->IsExceptionPending());
-  EXPECT_EQ(static_cast<size_t>(JNI_FALSE), result);
+  EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
 
   result = Invoke3WithReferrer(
       static_cast<size_t>(inf_contains->GetDexMethodIndex()),
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index e593f39..15a8571 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -17,6 +17,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
@@ -33,67 +34,12 @@
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // More math.
   qpoints->pCos = cos;
   qpoints->pSin = sin;
@@ -128,35 +74,6 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index b97a8db..0093e82 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,11 +45,6 @@
     "silvermont",
 };
 
-static constexpr const char* x86_variants_prefer_locked_add_sync[] = {
-    "atom",
-    "silvermont",
-};
-
 static constexpr const char* x86_variants_with_popcnt[] = {
     "silvermont",
 };
@@ -69,10 +64,6 @@
   bool has_AVX = false;
   bool has_AVX2 = false;
 
-  bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync,
-                                               arraysize(x86_variants_prefer_locked_add_sync),
-                                               variant);
-
   bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
                                        arraysize(x86_variants_with_popcnt),
                                        variant);
@@ -86,10 +77,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   }
 }
 
@@ -101,16 +92,13 @@
   bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
-  bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
   bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                            has_AVX, has_AVX2, prefers_locked_add,
-                                            has_POPCNT);
+                                            has_AVX, has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                         has_AVX, has_AVX2, prefers_locked_add,
-                                         has_POPCNT);
+                                         has_AVX, has_AVX2, has_POPCNT);
   }
 }
 
@@ -147,9 +135,6 @@
   const bool has_AVX2 = true;
 #endif
 
-  // No #define for memory synchronization preference.
-  const bool prefers_locked_add = false;
-
 #ifndef __POPCNT__
   const bool has_POPCNT = false;
 #else
@@ -158,10 +143,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -174,8 +159,6 @@
   bool has_SSE4_2 = false;
   bool has_AVX = false;
   bool has_AVX2 = false;
-  // No cpuinfo for memory synchronization preference.
-  const bool prefers_locked_add = false;
   bool has_POPCNT = false;
 
   std::ifstream in("/proc/cpuinfo");
@@ -217,10 +200,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
@@ -245,7 +228,6 @@
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
       (has_AVX2_ == other_as_x86->has_AVX2_) &&
-      (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
       (has_POPCNT_ == other_as_x86->has_POPCNT_);
 }
 
@@ -256,7 +238,6 @@
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
       (has_AVX2_ ? kAvx2Bitfield : 0) |
-      (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
       (has_POPCNT_ ? kPopCntBitfield : 0);
 }
 
@@ -292,11 +273,6 @@
   } else {
     result += ",-avx2";
   }
-  if (prefers_locked_add_) {
-    result += ",lock_add";
-  } else {
-    result += ",-lock_add";
-  }
   if (has_POPCNT_) {
     result += ",popcnt";
   } else {
@@ -313,7 +289,6 @@
   bool has_SSE4_2 = has_SSE4_2_;
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
-  bool prefers_locked_add = prefers_locked_add_;
   bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
@@ -337,10 +312,6 @@
       has_AVX2 = true;
     } else if (feature == "-avx2") {
       has_AVX2 = false;
-    } else if (feature == "lock_add") {
-      prefers_locked_add = true;
-    } else if (feature == "-lock_add") {
-      prefers_locked_add = false;
     } else if (feature == "popcnt") {
       has_POPCNT = true;
     } else if (feature == "-popcnt") {
@@ -352,10 +323,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, prefers_locked_add, has_POPCNT);
+                                            has_AVX2, has_POPCNT);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, prefers_locked_add, has_POPCNT);
+                                         has_AVX2, has_POPCNT);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 1819654..2aa8ae6 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,8 +60,6 @@
 
   bool HasSSE4_1() const { return has_SSE4_1_; }
 
-  bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
-
   bool HasPopCnt() const { return has_POPCNT_; }
 
  protected:
@@ -77,16 +75,13 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2,
-                            bool prefers_locked_add,
-                            bool has_POPCNT)
+                            bool has_AVX, bool has_AVX2, bool has_POPCNT)
       : InstructionSetFeatures(smp),
         has_SSSE3_(has_SSSE3),
         has_SSE4_1_(has_SSE4_1),
         has_SSE4_2_(has_SSE4_2),
         has_AVX_(has_AVX),
         has_AVX2_(has_AVX2),
-        prefers_locked_add_(prefers_locked_add),
         has_POPCNT_(has_POPCNT) {
   }
 
@@ -99,8 +94,7 @@
     kSse4_2Bitfield = 8,
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
-    kPrefersLockedAdd = 64,
-    kPopCntBitfield = 128,
+    kPopCntBitfield = 64,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -108,7 +102,6 @@
   const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
-  const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
   const bool has_POPCNT_;  // x86 population count
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index a062c12..9e154c6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
@@ -40,9 +40,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 67U);
+  EXPECT_EQ(x86_features->AsBitmap(), 3U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -50,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -60,9 +60,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
@@ -77,9 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
                x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 207U);
+  EXPECT_EQ(x86_features->AsBitmap(), 79U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -97,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 82ac574..485da9f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -897,8 +897,123 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class.
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                           // Check null class
+    jz   VAR(slowPathLabel)
+                                                               // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
+    jne  VAR(slowPathLabel)
+                                                               // No fake dependence needed on x86
+                                                               // between status and flags load,
+                                                               // since each load is a load-acquire,
+                                                               // no loads reordering.
+                                                               // Check access flags has
+                                                               // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
+    jnz  VAR(slowPathLabel)
+    movl %fs:THREAD_SELF_OFFSET, %ebx                          // ebx = thread
+    movl THREAD_LOCAL_END_OFFSET(%ebx), %edi                   // Load thread_local_end.
+    subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi                   // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi           // Load the object size.
+    cmpl %edi, %esi                                            // Check if it fits. OK to do this
+                                                               // before rounding up the object size
+                                                               // assuming the buf size alignment.
+    ja   VAR(slowPathLabel)
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi                  // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi
+    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax                   // Load thread_local_pos
+                                                               // as allocated object.
+    addl %eax, %esi                                            // Add the object size.
+    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)                   // Update thread_local_pos.
+    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)         // Increase thread_local_objects.
+                                                               // Store the class pointer in the header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
+    POP edi
+    POP esi
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    POP edi
+    POP esi
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx                 // save ref containing registers for GC
+    // Outgoing argument set up
+    PUSH eax                                                   // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET                               // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
+    PUSH eax
+    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
+    addl LITERAL(16), %esp
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx      // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                               // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
+END_FUNCTION art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    PUSH esi
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx      // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                               // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+                                                               // Read barrier for class load.
+    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH eax
+    PUSH ecx
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                                // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH edx                                                   // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movl %eax, %edx
+    addl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP ecx
+    POP eax
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -960,6 +1075,22 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
+DEFINE_FUNCTION art_quick_lock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                 // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                              // pass object
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    addl LITERAL(16), %esp                // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object_no_inline
+
+
 DEFINE_FUNCTION art_quick_unlock_object
     testl %eax, %eax                      // null check object/eax
     jz   .Lslow_unlock
@@ -1015,6 +1146,21 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
+DEFINE_FUNCTION art_quick_unlock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl LITERAL(8), %esp                 // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                              // pass object
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
+    addl LITERAL(16), %esp                // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object_no_inline
+
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - obj->klass
@@ -1422,7 +1568,7 @@
 .Limt_table_iterate:
     cmpl %edi, 0(%eax)
     jne .Limt_table_next_entry
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     POP EDI
     movl __SIZEOF_POINTER__(%eax), %eax
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 3d19f06..c39d122 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -45,16 +45,17 @@
   MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
-  const size_t limit = kPageSize;
+  const size_t limit = sizeof(Thread);
 
   const int contents = MODIFY_LDT_CONTENTS_DATA;
   const int seg_32bit = 1;
   const int read_exec_only = 0;
-  const int limit_in_pages = 0;
+  const int limit_in_pages = 1;
   const int seg_not_present = 0;
   const int useable = 1;
 
-  int entry_number = -1;
+  int entry_number;
+  uint16_t table_indicator;
 
 #if defined(__APPLE__)
   descriptor_table_entry_t entry;
@@ -77,41 +78,52 @@
   if (entry_number == -1) {
     PLOG(FATAL) << "i386_set_ldt failed";
   }
+
+  table_indicator = 1 << 2;  // LDT
 #else
-  // Read current LDT entries.
-  static_assert(static_cast<size_t>(LDT_ENTRY_SIZE) == sizeof(uint64_t),
-                "LDT_ENTRY_SIZE is different from sizeof(uint64_t).");
-  std::vector<uint64_t> ldt(LDT_ENTRIES);
-  size_t ldt_size(sizeof(uint64_t) * ldt.size());
-  memset(&ldt[0], 0, ldt_size);
-  // TODO: why doesn't this return LDT_ENTRY_SIZE * LDT_ENTRIES for the main thread?
-  syscall(__NR_modify_ldt, 0, &ldt[0], ldt_size);
+  // We use a GDT entry on Linux.
+  user_desc gdt_entry;
+  memset(&gdt_entry, 0, sizeof(gdt_entry));
 
-  // Find the first empty slot.
-  for (entry_number = 0; entry_number < LDT_ENTRIES && ldt[entry_number] != 0; ++entry_number) {
-  }
-  if (entry_number >= LDT_ENTRIES) {
-    LOG(FATAL) << "Failed to find a free LDT slot";
-  }
+  // On Linux, there are 3 TLS GDT entries. We use one of those to to store our segment descriptor
+  // data.
+  //
+  // This entry must be shared, as the kernel only guarantees three TLS entries. For simplicity
+  // (and locality), use this local global, which practically becomes readonly after the first
+  // (startup) thread of the runtime has been initialized (during Runtime::Start()).
+  //
+  // We also share this between all runtimes in the process. This is both for simplicity (one
+  // well-known slot) as well as to avoid the three-slot limitation. Downside is that we cannot
+  // free the slot when it is known that a runtime stops.
+  static unsigned int gdt_entry_number = -1;
 
-  // Update LDT entry.
-  user_desc ldt_entry;
-  memset(&ldt_entry, 0, sizeof(ldt_entry));
-  ldt_entry.entry_number = entry_number;
-  ldt_entry.base_addr = base;
-  ldt_entry.limit = limit;
-  ldt_entry.seg_32bit = seg_32bit;
-  ldt_entry.contents = contents;
-  ldt_entry.read_exec_only = read_exec_only;
-  ldt_entry.limit_in_pages = limit_in_pages;
-  ldt_entry.seg_not_present = seg_not_present;
-  ldt_entry.useable = useable;
-  CHECK_EQ(0, syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry)));
-  entry_number = ldt_entry.entry_number;
+  if (gdt_entry_number == static_cast<unsigned int>(-1)) {
+    gdt_entry.entry_number = -1;  // Let the kernel choose.
+  } else {
+    gdt_entry.entry_number = gdt_entry_number;
+  }
+  gdt_entry.base_addr = base;
+  gdt_entry.limit = limit;
+  gdt_entry.seg_32bit = seg_32bit;
+  gdt_entry.contents = contents;
+  gdt_entry.read_exec_only = read_exec_only;
+  gdt_entry.limit_in_pages = limit_in_pages;
+  gdt_entry.seg_not_present = seg_not_present;
+  gdt_entry.useable = useable;
+  int rc = syscall(__NR_set_thread_area, &gdt_entry);
+  if (rc != -1) {
+    entry_number = gdt_entry.entry_number;
+    if (gdt_entry_number == static_cast<unsigned int>(-1)) {
+      gdt_entry_number = entry_number;  // Save the kernel-assigned entry number.
+    }
+  } else {
+    PLOG(FATAL) << "set_thread_area failed";
+    UNREACHABLE();
+  }
+  table_indicator = 0;  // GDT
 #endif
 
-  // Change %fs to be new LDT entry.
-  uint16_t table_indicator = 1 << 2;  // LDT
+  // Change %fs to be new DT entry.
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
   __asm__ __volatile__("movw %w0, %%fs"
@@ -163,13 +175,18 @@
   UNUSED(selector);
   // i386_set_ldt(selector >> 3, 0, 1);
 #else
-  user_desc ldt_entry;
-  memset(&ldt_entry, 0, sizeof(ldt_entry));
-  ldt_entry.entry_number = selector >> 3;
-  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
-  ldt_entry.seg_not_present = 1;
-
-  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+  // Note if we wanted to clean up the GDT entry, we would do that here, when the *last* thread
+  // is being deleted. But see the comment on gdt_entry_number. Code would look like this:
+  //
+  // user_desc gdt_entry;
+  // memset(&gdt_entry, 0, sizeof(gdt_entry));
+  // gdt_entry.entry_number = selector >> 3;
+  // gdt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  // // "Empty" = Delete = seg_not_present==1 && read_exec_only==1.
+  // gdt_entry.seg_not_present = 1;
+  // gdt_entry.read_exec_only = 1;
+  // syscall(__NR_set_thread_area, &gdt_entry);
+  UNUSED(selector);
 #endif
 }
 
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 0a5d14a..bd6df70 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -17,6 +17,9 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
+#if !defined(__APPLE__)
+#include "entrypoints/quick/quick_default_init_entrypoints.h"
+#endif
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/math_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
@@ -38,67 +41,12 @@
   UNUSED(jpoints, qpoints);
   UNIMPLEMENTED(FATAL);
 #else
-  // JNI
-  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
-
-  // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
   qpoints->pCheckCast = art_quick_check_cast;
 
-  // DexCache
-  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
-  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
-  qpoints->pInitializeType = art_quick_initialize_type;
-  qpoints->pResolveString = art_quick_resolve_string;
-
-  // Field
-  qpoints->pSet8Instance = art_quick_set8_instance;
-  qpoints->pSet8Static = art_quick_set8_static;
-  qpoints->pSet16Instance = art_quick_set16_instance;
-  qpoints->pSet16Static = art_quick_set16_static;
-  qpoints->pSet32Instance = art_quick_set32_instance;
-  qpoints->pSet32Static = art_quick_set32_static;
-  qpoints->pSet64Instance = art_quick_set64_instance;
-  qpoints->pSet64Static = art_quick_set64_static;
-  qpoints->pSetObjInstance = art_quick_set_obj_instance;
-  qpoints->pSetObjStatic = art_quick_set_obj_static;
-  qpoints->pGetByteInstance = art_quick_get_byte_instance;
-  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
-  qpoints->pGetShortInstance = art_quick_get_short_instance;
-  qpoints->pGetCharInstance = art_quick_get_char_instance;
-  qpoints->pGet32Instance = art_quick_get32_instance;
-  qpoints->pGet64Instance = art_quick_get64_instance;
-  qpoints->pGetObjInstance = art_quick_get_obj_instance;
-  qpoints->pGetByteStatic = art_quick_get_byte_static;
-  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
-  qpoints->pGetShortStatic = art_quick_get_short_static;
-  qpoints->pGetCharStatic = art_quick_get_char_static;
-  qpoints->pGet32Static = art_quick_get32_static;
-  qpoints->pGet64Static = art_quick_get64_static;
-  qpoints->pGetObjStatic = art_quick_get_obj_static;
-
-  // Array
-  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
-  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
-  qpoints->pAputObject = art_quick_aput_obj;
-  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
-
-  // JNI
-  qpoints->pJniMethodStart = JniMethodStart;
-  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
-  qpoints->pJniMethodEnd = JniMethodEnd;
-  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
-  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
-  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
-  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-
-  // Locks
-  qpoints->pLockObject = art_quick_lock_object;
-  qpoints->pUnlockObject = art_quick_unlock_object;
-
   // More math.
   qpoints->pCos = cos;
   qpoints->pSin = sin;
@@ -132,35 +80,6 @@
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
-  // Invocation
-  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
-  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
-  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
-  qpoints->pInvokeDirectTrampolineWithAccessCheck =
-      art_quick_invoke_direct_trampoline_with_access_check;
-  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
-      art_quick_invoke_interface_trampoline_with_access_check;
-  qpoints->pInvokeStaticTrampolineWithAccessCheck =
-      art_quick_invoke_static_trampoline_with_access_check;
-  qpoints->pInvokeSuperTrampolineWithAccessCheck =
-      art_quick_invoke_super_trampoline_with_access_check;
-  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
-      art_quick_invoke_virtual_trampoline_with_access_check;
-
-  // Thread
-  qpoints->pTestSuspend = art_quick_test_suspend;
-
-  // Throws
-  qpoints->pDeliverException = art_quick_deliver_exception;
-  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
-  qpoints->pThrowDivZero = art_quick_throw_div_zero;
-  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
-  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
-  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
-
-  // Deoptimize
-  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
-
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index aba7234..0840f89 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,10 +74,9 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2, bool prefers_locked_add,
-                               bool has_POPCNT)
+                               bool has_AVX, bool has_AVX2, bool has_POPCNT)
       : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                  has_AVX2, prefers_locked_add, has_POPCNT) {
+                                  has_AVX2, has_POPCNT) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 78aeacf..f2b2cd8 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 90049cc..8064ed6 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -894,57 +894,107 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                           // Check null class
+    jz   RAW_VAR(slowPathLabel)
+                                                               // Check class status.
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne  RAW_VAR(slowPathLabel)
+                                                               // No fake dependence needed on x86
+                                                               // between status and flags load,
+                                                               // since each load is a load-acquire,
+                                                               // no loads reordering.
+                                                               // Check access flags has
+                                                               // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz  RAW_VAR(slowPathLabel)
+    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
+    movq THREAD_LOCAL_END_OFFSET(%r8), %rax                    // Load thread_local_end.
+    subq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Compute the remaining buffer size.
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
+    cmpq %rax, %rcx                                            // Check if it fits. OK to do this
+                                                               // before rounding up the object size
+                                                               // assuming the buf size alignment.
+    ja   RAW_VAR(slowPathLabel)
+    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
+    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
+    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos
+                                                               // as allocated object.
+    addq %rax, %rcx                                            // Add the object size.
+    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
+    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increase thread_local_objects.
+                                                               // Store the class pointer in the header.
+                                                               // No fence needed for x86.
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    ret                                                        // Fast path succeeded.
+END_MACRO
+
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
+    call VAR(cxx_name)                                         // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+END_MACRO
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    // TODO: Add read barrier when this function is used.
-    // Note this function can/should implement read barrier fast path only
-    // (no read barrier slow path) because this is the fast path of tlab allocation.
-    // We can fall back to the allocation slow path to do the read barrier slow path.
 #if defined(USE_READ_BARRIER)
     int3
     int3
 #endif
     // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
-    // TODO: Add read barrier when this function is used.
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
-    testl %edx, %edx                                           // Check null class
-    jz   .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check class status.
-    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
-    jne  .Lart_quick_alloc_object_tlab_slow_path
-                                                               // Check access flags has kAccClassIsFinalizable
-    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
-    jnz  .Lart_quick_alloc_object_tlab_slow_path
-    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
-    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
-    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
-    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
-    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
-    addq %rax, %rcx                                            // Add the object size.
-    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
-    ja   .Lart_quick_alloc_object_tlab_slow_path
-    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
-    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
-                                                               // Store the class pointer in the header.
-                                                               // No fence needed for x86.
-    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
-    ret                                                        // Fast path succeeded.
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
-    // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeTLAB)                    // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                               // Load the class
+    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -994,6 +1044,14 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
+DEFINE_FUNCTION art_quick_lock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object_no_inline
+
 DEFINE_FUNCTION art_quick_unlock_object
     testl %edi, %edi                      // null check object/edi
     jz   .Lslow_unlock
@@ -1037,6 +1095,14 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
+DEFINE_FUNCTION art_quick_unlock_object_no_inline
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object_no_inline
+
 DEFINE_FUNCTION art_quick_check_cast
     PUSH rdi                          // Save args for exc
     PUSH rsi
@@ -1331,7 +1397,7 @@
 .Limt_table_iterate:
     cmpq %r10, 0(%rdi)
     jne .Limt_table_next_entry
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     movq __SIZEOF_POINTER__(%rdi), %rdi
     jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 3463b0d..d911497 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -343,6 +343,49 @@
   }
 }
 
+// If kExactOffset is true then we only find the matching offset, not the field containing the
+// offset.
+template <bool kExactOffset>
+static inline ArtField* FindFieldWithOffset(
+    const IterationRange<StrideIterator<ArtField>>& fields,
+    uint32_t field_offset) SHARED_REQUIRES(Locks::mutator_lock_) {
+  for (ArtField& field : fields) {
+    if (kExactOffset) {
+      if (field.GetOffset().Uint32Value() == field_offset) {
+        return &field;
+      }
+    } else {
+      const uint32_t offset = field.GetOffset().Uint32Value();
+      Primitive::Type type = field.GetTypeAsPrimitiveType();
+      const size_t field_size = Primitive::ComponentSize(type);
+      DCHECK_GT(field_size, 0u);
+      if (offset <= field_offset && field_offset < offset + field_size) {
+        return &field;
+      }
+    }
+  }
+  return nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass,
+                                                       uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  ArtField* field = FindFieldWithOffset<kExactOffset>(klass->GetIFields(), field_offset);
+  if (field != nullptr) {
+    return field;
+  }
+  // We did not find field in the class: look into superclass.
+  return (klass->GetSuperClass() != nullptr) ?
+      FindInstanceFieldWithOffset<kExactOffset>(klass->GetSuperClass(), field_offset) : nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  return FindFieldWithOffset<kExactOffset>(klass->GetSFields(), field_offset);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 3737e0d..ea5078e 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -47,28 +47,6 @@
   offset_ = num_bytes.Uint32Value();
 }
 
-ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetIFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  // We did not find field in the class: look into superclass.
-  return (klass->GetSuperClass() != nullptr) ?
-      FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset) : nullptr;
-}
-
-ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetSFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  return nullptr;
-}
-
 mirror::Class* ArtField::ProxyFindSystemClass(const char* descriptor) {
   DCHECK(GetDeclaringClass()->IsProxyClass());
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
diff --git a/runtime/art_field.h b/runtime/art_field.h
index ee1ba1f..b64b70f 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -159,9 +159,16 @@
   }
 
   // Returns an instance field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Returns a static field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 6449efa..7647ad6 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -456,13 +456,18 @@
       interface_method->VisitRoots(visitor, pointer_size);
     }
     visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
-    // Runtime methods and native methods use the same field as the profiling info for
-    // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
-    // some runtime methods).
-    if (!IsNative() && !IsRuntimeMethod()) {
-      ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
-      if (profiling_info != nullptr) {
-        profiling_info->VisitRoots(visitor);
+    // We know we don't have profiling information if the class hasn't been verified. Note
+    // that this check also ensures the IsNative call can be made, as IsNative expects a fully
+    // created class (and not a retired one).
+    if (klass->IsVerified()) {
+      // Runtime methods and native methods use the same field as the profiling info for
+      // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
+      // some runtime methods).
+      if (!IsNative() && !IsRuntimeMethod()) {
+        ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
+        if (profiling_info != nullptr) {
+          profiling_info->VisitRoots(visitor);
+        }
       }
     }
   }
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f97ad51..1790df6 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -31,7 +31,6 @@
 #include "jit/jit_code_cache.h"
 #include "jit/profiling_info.h"
 #include "jni_internal.h"
-#include "mapping_table.h"
 #include "mirror/abstract_method.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
@@ -254,14 +253,17 @@
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
   // If the runtime is not yet started or it is required by the debugger, then perform the
-  // Invocation by the interpreter.
+  // Invocation by the interpreter, explicitly forcing interpretation over JIT to prevent
+  // cycling around the various JIT/Interpreter methods that handle method invocation.
   if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
-      art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, nullptr, args, result, /*stay_in_interpreter*/ true);
     } else {
       mirror::Object* receiver =
           reinterpret_cast<StackReference<mirror::Object>*>(&args[0])->AsMirrorPtr();
-      art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, receiver, args + 1, result, /*stay_in_interpreter*/ true);
     }
   } else {
     DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
@@ -277,7 +279,7 @@
 
       // Ensure that we won't be accidentally calling quick compiled code when -Xint.
       if (kIsDebugBuild && runtime->GetInstrumentation()->IsForcedInterpretOnly()) {
-        CHECK(!runtime->UseJit());
+        CHECK(!runtime->UseJitCompilation());
         const void* oat_quick_code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(this);
         CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
             << "Don't call compiled code when -Xint " << PrettyMethod(this);
@@ -482,7 +484,7 @@
   // to the JIT code, but this would require taking the JIT code cache lock to notify
   // it, which we do not want at this level.
   Runtime* runtime = Runtime::Current();
-  if (runtime->GetJit() != nullptr) {
+  if (runtime->UseJitCompilation()) {
     if (runtime->GetJit()->GetCodeCache()->ContainsPc(GetEntryPointFromQuickCompiledCode())) {
       SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(), image_pointer_size);
     }
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 3dbcd58..a012a5a 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -41,6 +41,7 @@
 namespace mirror {
 class Array;
 class Class;
+class IfTable;
 class PointerArray;
 }  // namespace mirror
 
@@ -50,66 +51,151 @@
 // with the last entry being null to make an assembly implementation of a lookup
 // faster.
 class ImtConflictTable {
+  enum MethodIndex {
+    kMethodInterface,
+    kMethodImplementation,
+    kMethodCount,  // Number of elements in enum.
+  };
+
  public:
   // Build a new table copying `other` and adding the new entry formed of
   // the pair { `interface_method`, `implementation_method` }
   ImtConflictTable(ImtConflictTable* other,
                    ArtMethod* interface_method,
-                   ArtMethod* implementation_method) {
-    size_t index = 0;
-    while (other->entries_[index].interface_method != nullptr) {
-      entries_[index] = other->entries_[index];
-      index++;
+                   ArtMethod* implementation_method,
+                   size_t pointer_size) {
+    const size_t count = other->NumEntries(pointer_size);
+    for (size_t i = 0; i < count; ++i) {
+      SetInterfaceMethod(i, pointer_size, other->GetInterfaceMethod(i, pointer_size));
+      SetImplementationMethod(i, pointer_size, other->GetImplementationMethod(i, pointer_size));
     }
-    entries_[index].interface_method = interface_method;
-    entries_[index].implementation_method = implementation_method;
+    SetInterfaceMethod(count, pointer_size, interface_method);
+    SetImplementationMethod(count, pointer_size, implementation_method);
     // Add the null marker.
-    entries_[index + 1].interface_method = nullptr;
-    entries_[index + 1].implementation_method = nullptr;
+    SetInterfaceMethod(count + 1, pointer_size, nullptr);
+    SetImplementationMethod(count + 1, pointer_size, nullptr);
+  }
+
+  // num_entries excludes the header.
+  ImtConflictTable(size_t num_entries, size_t pointer_size) {
+    SetInterfaceMethod(num_entries, pointer_size, nullptr);
+    SetImplementationMethod(num_entries, pointer_size, nullptr);
+  }
+
+  // Set an entry at an index.
+  void SetInterfaceMethod(size_t index, size_t pointer_size, ArtMethod* method) {
+    SetMethod(index * kMethodCount + kMethodInterface, pointer_size, method);
+  }
+
+  void SetImplementationMethod(size_t index, size_t pointer_size, ArtMethod* method) {
+    SetMethod(index * kMethodCount + kMethodImplementation, pointer_size, method);
+  }
+
+  ArtMethod* GetInterfaceMethod(size_t index, size_t pointer_size) const {
+    return GetMethod(index * kMethodCount + kMethodInterface, pointer_size);
+  }
+
+  ArtMethod* GetImplementationMethod(size_t index, size_t pointer_size) const {
+    return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
+  }
+
+  // Visit all of the entries.
+  // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
+  // and also returns one. The order is <interface, implementation>.
+  template<typename Visitor>
+  void Visit(const Visitor& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS {
+    uint32_t table_index = 0;
+    for (;;) {
+      ArtMethod* interface_method = GetInterfaceMethod(table_index, pointer_size);
+      if (interface_method == nullptr) {
+        break;
+      }
+      ArtMethod* implementation_method = GetImplementationMethod(table_index, pointer_size);
+      auto input = std::make_pair(interface_method, implementation_method);
+      std::pair<ArtMethod*, ArtMethod*> updated = visitor(input);
+      if (input.first != updated.first) {
+        SetInterfaceMethod(table_index, pointer_size, updated.first);
+      }
+      if (input.second != updated.second) {
+        SetImplementationMethod(table_index, pointer_size, updated.second);
+      }
+      ++table_index;
+    }
   }
 
   // Lookup the implementation ArtMethod associated to `interface_method`. Return null
   // if not found.
-  ArtMethod* Lookup(ArtMethod* interface_method) const {
+  ArtMethod* Lookup(ArtMethod* interface_method, size_t pointer_size) const {
     uint32_t table_index = 0;
-    ArtMethod* current_interface_method;
-    while ((current_interface_method = entries_[table_index].interface_method) != nullptr) {
-      if (current_interface_method == interface_method) {
-        return entries_[table_index].implementation_method;
+    for (;;) {
+      ArtMethod* current_interface_method = GetInterfaceMethod(table_index, pointer_size);
+      if (current_interface_method == nullptr) {
+        break;
       }
-      table_index++;
+      if (current_interface_method == interface_method) {
+        return GetImplementationMethod(table_index, pointer_size);
+      }
+      ++table_index;
     }
     return nullptr;
   }
 
-  // Compute the size in bytes taken by this table.
-  size_t ComputeSize() const {
+  // Compute the number of entries in this table.
+  size_t NumEntries(size_t pointer_size) const {
     uint32_t table_index = 0;
-    size_t total_size = 0;
-    while ((entries_[table_index].interface_method) != nullptr) {
-      total_size += sizeof(Entry);
-      table_index++;
+    while (GetInterfaceMethod(table_index, pointer_size) != nullptr) {
+      ++table_index;
     }
+    return table_index;
+  }
+
+  // Compute the size in bytes taken by this table.
+  size_t ComputeSize(size_t pointer_size) const {
     // Add the end marker.
-    return total_size + sizeof(Entry);
+    return ComputeSize(NumEntries(pointer_size), pointer_size);
   }
 
   // Compute the size in bytes needed for copying the given `table` and add
   // one more entry.
-  static size_t ComputeSizeWithOneMoreEntry(ImtConflictTable* table) {
-    return table->ComputeSize() + sizeof(Entry);
+  static size_t ComputeSizeWithOneMoreEntry(ImtConflictTable* table, size_t pointer_size) {
+    return table->ComputeSize(pointer_size) + EntrySize(pointer_size);
   }
 
-  struct Entry {
-    ArtMethod* interface_method;
-    ArtMethod* implementation_method;
-  };
+  // Compute size with a fixed number of entries.
+  static size_t ComputeSize(size_t num_entries, size_t pointer_size) {
+    return (num_entries + 1) * EntrySize(pointer_size);  // Add one for null terminator.
+  }
+
+  static size_t EntrySize(size_t pointer_size) {
+    return pointer_size * static_cast<size_t>(kMethodCount);
+  }
 
  private:
+  ArtMethod* GetMethod(size_t index, size_t pointer_size) const {
+    if (pointer_size == 8) {
+      return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data64_[index]));
+    } else {
+      DCHECK_EQ(pointer_size, 4u);
+      return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data32_[index]));
+    }
+  }
+
+  void SetMethod(size_t index, size_t pointer_size, ArtMethod* method) {
+    if (pointer_size == 8) {
+      data64_[index] = dchecked_integral_cast<uint64_t>(reinterpret_cast<uintptr_t>(method));
+    } else {
+      DCHECK_EQ(pointer_size, 4u);
+      data32_[index] = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(method));
+    }
+  }
+
   // Array of entries that the assembly stubs will iterate over. Note that this is
   // not fixed size, and we allocate data prior to calling the constructor
   // of ImtConflictTable.
-  Entry entries_[0];
+  union {
+    uint32_t data32_[0];
+    uint64_t data64_[0];
+  };
 
   DISALLOW_COPY_AND_ASSIGN(ImtConflictTable);
 };
@@ -220,6 +306,10 @@
     return !IsAbstract() && !IsDefaultConflicting();
   }
 
+  bool IsCompilable() {
+    return (GetAccessFlags() & kAccCompileDontBother) == 0;
+  }
+
   // A default conflict method is a special sentinel method that stands for a conflict between
   // multiple default methods. It cannot be invoked, throwing an IncompatibleClassChangeError if one
   // attempts to do so.
@@ -261,6 +351,12 @@
     SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
   }
 
+  // Should this method be run in the interpreter and count locks (e.g., failed structured-
+  // locking verification)?
+  bool MustCountLocks() {
+    return (GetAccessFlags() & kAccMustCountLocks) != 0;
+  }
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -347,7 +443,6 @@
 
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size)
-      REQUIRES(Roles::uninterruptible_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Find the method index for this method within other_dexfile. If this method isn't present then
@@ -413,8 +508,8 @@
     return reinterpret_cast<ImtConflictTable*>(GetEntryPointFromJniPtrSize(pointer_size));
   }
 
-  ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table) {
-    SetEntryPointFromJniPtrSize(table, sizeof(void*));
+  ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, size_t pointer_size) {
+    SetEntryPointFromJniPtrSize(table, pointer_size);
   }
 
   ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
@@ -545,6 +640,9 @@
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
+  // given that the counter is only 16 bits wide we can expect wrap-around in some
+  // situations.  Consumers of hotness_count_ must be able to deal with that.
   uint16_t IncrementCounter() {
     return ++hotness_count_;
   }
@@ -553,6 +651,14 @@
     hotness_count_ = 0;
   }
 
+  void SetCounter(int16_t hotness_count) {
+    hotness_count_ = hotness_count;
+  }
+
+  uint16_t GetCounter() const {
+    return hotness_count_;
+  }
+
   const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the method header for the compiled code containing 'pc'. Note that runtime
@@ -597,7 +703,7 @@
   // ifTable.
   uint16_t method_index_;
 
-  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
+  // The hotness we measure for this method. Managed by the interpreter. Not atomic, as we allow
   // missing increments: if the method is hot, we will see it eventually.
   uint16_t hotness_count_;
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 942f9de..21725d3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -20,6 +20,7 @@
 #if defined(__cplusplus)
 #include "art_method.h"
 #include "gc/allocator/rosalloc.h"
+#include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -188,7 +189,13 @@
 #define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
 ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
-#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+#define SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+ADD_TEST_EQ(SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CachedHotnessCountdownOffset()))
+#define SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 10)
+ADD_TEST_EQ(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::HotnessCountdownOffset()))
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 12)
 ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
@@ -389,6 +396,12 @@
 #define THREAD_CHECKPOINT_REQUEST 2
 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
 
+#define JIT_CHECK_OSR -1
+ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
+
+#define JIT_HOTNESS_DISABLE -2
+ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 70ff60f..b84e29f 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -77,9 +77,11 @@
   "RegAllocVldt ",
   "StackMapStm  ",
   "CodeGen      ",
+  "Assembler    ",
   "ParallelMove ",
   "GraphChecker ",
   "Verifier     ",
+  "CallingConv  ",
 };
 
 template <bool kCount>
@@ -160,6 +162,7 @@
 
 MallocArena::MallocArena(size_t size) {
   memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
+  CHECK(memory_ != nullptr);  // Abort on OOM.
   size_ = size;
 }
 
@@ -317,15 +320,27 @@
   // mark only the actually allocated memory as defined. That leaves red zones
   // and padding between allocations marked as inaccessible.
   size_t rounded_bytes = RoundUp(bytes + kMemoryToolRedZoneBytes, 8);
-  if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
-    // Obtain a new block.
-    ObtainNewArenaForAllocation(rounded_bytes);
-    CHECK(ptr_ != nullptr);
-    MEMORY_TOOL_MAKE_NOACCESS(ptr_, end_ - ptr_);
-  }
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
-  uint8_t* ret = ptr_;
-  ptr_ += rounded_bytes;
+  uint8_t* ret;
+  if (UNLIKELY(rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
+    ret = AllocFromNewArena(rounded_bytes);
+    uint8_t* noaccess_begin = ret + bytes;
+    uint8_t* noaccess_end;
+    if (ret == arena_head_->Begin()) {
+      DCHECK(ptr_ - rounded_bytes == ret);
+      noaccess_end = end_;
+    } else {
+      // We're still using the old arena but `ret` comes from a new one just after it.
+      DCHECK(arena_head_->next_ != nullptr);
+      DCHECK(ret == arena_head_->next_->Begin());
+      DCHECK_EQ(rounded_bytes, arena_head_->next_->GetBytesAllocated());
+      noaccess_end = arena_head_->next_->End();
+    }
+    MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+  } else {
+    ret = ptr_;
+    ptr_ += rounded_bytes;
+  }
   MEMORY_TOOL_MAKE_DEFINED(ret, bytes);
   // Check that the memory is already zeroed out.
   DCHECK(std::all_of(ret, ret + bytes, [](uint8_t val) { return val == 0u; }));
@@ -338,14 +353,27 @@
   pool_->FreeArenaChain(arena_head_);
 }
 
-void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) {
-  UpdateBytesAllocated();
-  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, allocation_size));
-  new_arena->next_ = arena_head_;
-  arena_head_ = new_arena;
-  // Update our internal data structures.
-  ptr_ = begin_ = new_arena->Begin();
-  end_ = new_arena->End();
+uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) {
+  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, bytes));
+  DCHECK(new_arena != nullptr);
+  DCHECK_LE(bytes, new_arena->Size());
+  if (static_cast<size_t>(end_ - ptr_) > new_arena->Size() - bytes) {
+    // The old arena has more space remaining than the new one, so keep using it.
+    // This can happen when the requested size is over half of the default size.
+    DCHECK(arena_head_ != nullptr);
+    new_arena->bytes_allocated_ = bytes;  // UpdateBytesAllocated() on the new_arena.
+    new_arena->next_ = arena_head_->next_;
+    arena_head_->next_ = new_arena;
+  } else {
+    UpdateBytesAllocated();
+    new_arena->next_ = arena_head_;
+    arena_head_ = new_arena;
+    // Update our internal data structures.
+    begin_ = new_arena->Begin();
+    ptr_ = begin_ + bytes;
+    end_ = new_arena->End();
+  }
+  return new_arena->Begin();
 }
 
 bool ArenaAllocator::Contains(const void* ptr) const {
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 697f7e0..6c1a898 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -88,9 +88,11 @@
   kArenaAllocRegisterAllocatorValidate,
   kArenaAllocStackMapStream,
   kArenaAllocCodeGenerator,
+  kArenaAllocAssembler,
   kArenaAllocParallelMoveResolver,
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
+  kArenaAllocCallingConvention,
   kNumArenaAllocKinds
 };
 
@@ -232,6 +234,8 @@
   friend class ScopedArenaAllocator;
   template <bool kCount> friend class ArenaAllocatorStatsImpl;
 
+  friend class ArenaAllocatorTest;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(Arena);
 };
@@ -301,14 +305,10 @@
       return AllocWithMemoryTool(bytes, kind);
     }
     bytes = RoundUp(bytes, kAlignment);
-    if (UNLIKELY(ptr_ + bytes > end_)) {
-      // Obtain a new block.
-      ObtainNewArenaForAllocation(bytes);
-      if (UNLIKELY(ptr_ == nullptr)) {
-        return nullptr;
-      }
-    }
     ArenaAllocatorStats::RecordAlloc(bytes, kind);
+    if (UNLIKELY(bytes > static_cast<size_t>(end_ - ptr_))) {
+      return AllocFromNewArena(bytes);
+    }
     uint8_t* ret = ptr_;
     ptr_ += bytes;
     return ret;
@@ -348,10 +348,6 @@
     return static_cast<T*>(Alloc(length * sizeof(T), kind));
   }
 
-  void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
-
-  void ObtainNewArenaForAllocation(size_t allocation_size);
-
   size_t BytesAllocated() const;
 
   MemStats GetMemStats() const;
@@ -367,6 +363,9 @@
   bool Contains(const void* ptr) const;
 
  private:
+  void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
+  uint8_t* AllocFromNewArena(size_t bytes);
+
   static constexpr size_t kAlignment = 8;
 
   void UpdateBytesAllocated();
@@ -380,6 +379,8 @@
   template <typename U>
   friend class ArenaAllocatorAdapter;
 
+  friend class ArenaAllocatorTest;
+
   DISALLOW_COPY_AND_ASSIGN(ArenaAllocator);
 };  // ArenaAllocator
 
diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc
new file mode 100644
index 0000000..9de3cc4
--- /dev/null
+++ b/runtime/base/arena_allocator_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class ArenaAllocatorTest : public testing::Test {
+ protected:
+  size_t NumberOfArenas(ArenaAllocator* arena) {
+    size_t result = 0u;
+    for (Arena* a = arena->arena_head_; a != nullptr; a = a->next_) {
+      ++result;
+    }
+    return result;
+  }
+};
+
+TEST_F(ArenaAllocatorTest, Test) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  ArenaBitVector bv(&arena, 10, true);
+  bv.SetBit(5);
+  EXPECT_EQ(1U, bv.GetStorageSize());
+  bv.SetBit(35);
+  EXPECT_EQ(2U, bv.GetStorageSize());
+}
+
+TEST_F(ArenaAllocatorTest, MakeDefined) {
+  // Regression test to make sure we mark the allocated area defined.
+  ArenaPool pool;
+  static constexpr size_t kSmallArraySize = 10;
+  static constexpr size_t kLargeArraySize = 50;
+  uint32_t* small_array;
+  {
+    // Allocate a small array from an arena and release it.
+    ArenaAllocator arena(&pool);
+    small_array = arena.AllocArray<uint32_t>(kSmallArraySize);
+    ASSERT_EQ(0u, small_array[kSmallArraySize - 1u]);
+  }
+  {
+    // Reuse the previous arena and allocate more than previous allocation including red zone.
+    ArenaAllocator arena(&pool);
+    uint32_t* large_array = arena.AllocArray<uint32_t>(kLargeArraySize);
+    ASSERT_EQ(0u, large_array[kLargeArraySize - 1u]);
+    // Verify that the allocation was made on the same arena.
+    ASSERT_EQ(small_array, large_array);
+  }
+}
+
+TEST_F(ArenaAllocatorTest, LargeAllocations) {
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 5 / 8);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 2 / 8);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(1u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 11 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 7 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(3u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
+    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    ASSERT_NE(alloc1, alloc2);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+    // Note: Leaving some space for memory tool red zones.
+    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    ASSERT_NE(alloc1, alloc3);
+    ASSERT_NE(alloc2, alloc3);
+    ASSERT_EQ(2u, NumberOfArenas(&arena));
+  }
+  {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    // Note: Leaving some space for memory tool red zones.
+    for (size_t i = 0; i != 15; ++i) {
+      arena.Alloc(Arena::kDefaultSize * 1 / 16);    // Allocate 15 times from the same arena.
+      ASSERT_EQ(i + 1u, NumberOfArenas(&arena));
+      arena.Alloc(Arena::kDefaultSize * 17 / 16);   // Allocate a separate arena.
+      ASSERT_EQ(i + 2u, NumberOfArenas(&arena));
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h
index 56e35d8..2d8e7d8 100644
--- a/runtime/base/arena_object.h
+++ b/runtime/base/arena_object.h
@@ -48,7 +48,6 @@
 
 
 // Parent for arena allocated objects that get deleted, gives appropriate new and delete operators.
-// Currently this is used by the quick compiler for debug reference counting arena allocations.
 template<enum ArenaAllocKind kAllocKind>
 class DeletableArenaObject {
  public:
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 8430d68..f279f45 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -269,7 +269,7 @@
 template <typename T>
 static constexpr T MaxInt(size_t bits) {
   return
-      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(std::is_unsigned<T>::value || bits > 0, "bits cannot be zero for signed", 0)
       DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
       bits == BitSizeOf<T>()
           ? std::numeric_limits<T>::max()
@@ -283,7 +283,7 @@
 template <typename T>
 static constexpr T MinInt(size_t bits) {
   return
-      DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0)
+      DCHECK_CONSTEXPR(std::is_unsigned<T>::value || bits > 0, "bits cannot be zero for signed", 0)
       DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0)
       bits == BitSizeOf<T>()
           ? std::numeric_limits<T>::min()
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index f884649..6b67864 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -19,6 +19,7 @@
 
 #include <assert.h>
 #include <limits>
+#include <stdint.h>
 #include <string.h>
 #include <type_traits>
 
@@ -34,7 +35,7 @@
 // When you use implicit_cast, the compiler checks that the cast is safe.
 // Such explicit implicit_casts are necessary in surprisingly many
 // situations where C++ demands an exact type match instead of an
-// argument type convertable to a target type.
+// argument type convertible to a target type.
 //
 // The From type can be inferred, so the preferred syntax for using
 // implicit_cast is the same as for static_cast etc.:
@@ -102,6 +103,29 @@
   return static_cast<Dest>(source);
 }
 
+// A version of reinterpret_cast<>() between pointers and int64_t/uint64_t
+// that goes through uintptr_t to avoid treating the pointer as "signed."
+
+template <typename Dest, typename Source>
+inline Dest reinterpret_cast64(Source source) {
+  // This is the overload for casting from int64_t/uint64_t to a pointer.
+  static_assert(std::is_same<Source, int64_t>::value || std::is_same<Source, uint64_t>::value,
+                "Source must be int64_t or uint64_t.");
+  static_assert(std::is_pointer<Dest>::value, "Dest must be a pointer.");
+  // Check that we don't lose any non-0 bits here.
+  DCHECK_EQ(static_cast<Source>(static_cast<uintptr_t>(source)), source);
+  return reinterpret_cast<Dest>(static_cast<uintptr_t>(source));
+}
+
+template <typename Dest, typename Source>
+inline Dest reinterpret_cast64(Source* ptr) {
+  // This is the overload for casting from a pointer to int64_t/uint64_t.
+  static_assert(std::is_same<Dest, int64_t>::value || std::is_same<Dest, uint64_t>::value,
+                "Dest must be int64_t or uint64_t.");
+  static_assert(sizeof(uintptr_t) <= sizeof(Dest), "Expecting at most 64-bit pointers.");
+  return static_cast<Dest>(reinterpret_cast<uintptr_t>(ptr));
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_CASTS_H_
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index fc1a52f..12d3be7 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -140,7 +140,7 @@
 
   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
 
-  HashSet(double min_load_factor, double max_load_factor)
+  HashSet(double min_load_factor, double max_load_factor) noexcept
       : num_elements_(0u),
         num_buckets_(0u),
         elements_until_expand_(0u),
@@ -152,7 +152,7 @@
     DCHECK_LT(max_load_factor, 1.0);
   }
 
-  explicit HashSet(const allocator_type& alloc)
+  explicit HashSet(const allocator_type& alloc) noexcept
       : allocfn_(alloc),
         hashfn_(),
         emptyfn_(),
@@ -166,7 +166,7 @@
         max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(const HashSet& other)
+  HashSet(const HashSet& other) noexcept
       : allocfn_(other.allocfn_),
         hashfn_(other.hashfn_),
         emptyfn_(other.emptyfn_),
@@ -184,7 +184,9 @@
     }
   }
 
-  HashSet(HashSet&& other)
+  // noexcept required so that the move constructor is used instead of copy constructor.
+  // b/27860101
+  HashSet(HashSet&& other) noexcept
       : allocfn_(std::move(other.allocfn_)),
         hashfn_(std::move(other.hashfn_)),
         emptyfn_(std::move(other.emptyfn_)),
@@ -206,7 +208,7 @@
   // Construct from existing data.
   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
   // passed in ptr_.
-  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) {
+  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
     uint64_t temp;
     size_t offset = 0;
     offset = ReadFromBytes(ptr, offset, &temp);
@@ -256,12 +258,12 @@
     DeallocateStorage();
   }
 
-  HashSet& operator=(HashSet&& other) {
+  HashSet& operator=(HashSet&& other) noexcept {
     HashSet(std::move(other)).swap(*this);
     return *this;
   }
 
-  HashSet& operator=(const HashSet& other) {
+  HashSet& operator=(const HashSet& other) noexcept {
     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
     return *this;
   }
@@ -298,6 +300,11 @@
     return Size() == 0;
   }
 
+  // Return true if the hash set has ownership of the underlying data.
+  bool OwnsData() const {
+    return owns_data_;
+  }
+
   // Erase algorithm:
   // Make an empty slot where the iterator is pointing.
   // Scan forwards until we hit another empty slot.
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index bcb7b3b..0e3bc8e 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -85,6 +85,10 @@
     return max_value_added_;
   }
 
+  Value BucketWidth() const {
+    return bucket_width_;
+  }
+
   const std::string& Name() const {
     return name_;
   }
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 7a620e3..3ee15a2 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -26,7 +26,7 @@
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/log.h"
 #else
 #include <sys/types.h>
@@ -47,7 +47,7 @@
 // Print INTERNAL_FATAL messages directly instead of at destruction time. This only works on the
 // host right now: for the device, a stream buf collating output into lines and calling LogLine or
 // lower-level logging is necessary.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static constexpr bool kPrintInternalFatalDirectly = false;
 #else
 static constexpr bool kPrintInternalFatalDirectly = !kIsTargetBuild;
@@ -185,14 +185,15 @@
 LogMessage::LogMessage(const char* file, unsigned int line, LogSeverity severity, int error)
   : data_(new LogMessageData(file, line, severity, error)) {
   if (PrintDirectly(severity)) {
-    static const char* log_characters = "VDIWEFF";
-    CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
-    stream() << ProgramInvocationShortName() << " " << log_characters[static_cast<size_t>(severity)]
+    static constexpr char kLogCharacters[] = { 'N', 'V', 'D', 'I', 'W', 'E', 'F', 'F' };
+    static_assert(arraysize(kLogCharacters) == static_cast<size_t>(INTERNAL_FATAL) + 1,
+                  "Wrong character array size");
+    stream() << ProgramInvocationShortName() << " " << kLogCharacters[static_cast<size_t>(severity)]
              << " " << getpid() << " " << ::art::GetTid() << " " << file << ":" <<  line << "]";
   }
 }
 LogMessage::~LogMessage() {
-  if (!PrintDirectly(data_->GetSeverity())) {
+  if (!PrintDirectly(data_->GetSeverity()) && data_->GetSeverity() != LogSeverity::NONE) {
     if (data_->GetSeverity() < gMinimumLogSeverity) {
       return;  // No need to format something we're not going to output.
     }
@@ -234,8 +235,9 @@
   return data_->GetBuffer();
 }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 static const android_LogPriority kLogSeverityToAndroidLogPriority[] = {
+  ANDROID_LOG_VERBOSE,  // NONE, use verbose as stand-in, will never be printed.
   ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
   ANDROID_LOG_ERROR, ANDROID_LOG_FATAL, ANDROID_LOG_FATAL
 };
@@ -245,16 +247,20 @@
 
 void LogMessage::LogLine(const char* file, unsigned int line, LogSeverity log_severity,
                          const char* message) {
-#ifdef __ANDROID__
+  if (log_severity == LogSeverity::NONE) {
+    return;
+  }
+
+#ifdef ART_TARGET_ANDROID
   const char* tag = ProgramInvocationShortName();
-  int priority = kLogSeverityToAndroidLogPriority[log_severity];
+  int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
   if (priority == ANDROID_LOG_FATAL) {
     LOG_PRI(priority, tag, "%s:%u] %s", file, line, message);
   } else {
     LOG_PRI(priority, tag, "%s", message);
   }
 #else
-  static const char* log_characters = "VDIWEFF";
+  static const char* log_characters = "NVDIWEFF";
   CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
   char severity = log_characters[log_severity];
   fprintf(stderr, "%s %c %5d %5d %s:%u] %s\n",
@@ -264,10 +270,14 @@
 
 void LogMessage::LogLineLowStack(const char* file, unsigned int line, LogSeverity log_severity,
                                  const char* message) {
-#ifdef __ANDROID__
+  if (log_severity == LogSeverity::NONE) {
+    return;
+  }
+
+#ifdef ART_TARGET_ANDROID
   // Use android_writeLog() to avoid stack-based buffers used by android_printLog().
   const char* tag = ProgramInvocationShortName();
-  int priority = kLogSeverityToAndroidLogPriority[log_severity];
+  int priority = kLogSeverityToAndroidLogPriority[static_cast<size_t>(log_severity)];
   char* buf = nullptr;
   size_t buf_size = 0u;
   if (priority == ANDROID_LOG_FATAL) {
@@ -285,13 +295,14 @@
     android_writeLog(priority, tag, message);
   }
 #else
-  static const char* log_characters = "VDIWEFF";
-  CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
+  static constexpr char kLogCharacters[] = { 'N', 'V', 'D', 'I', 'W', 'E', 'F', 'F' };
+  static_assert(arraysize(kLogCharacters) == static_cast<size_t>(INTERNAL_FATAL) + 1,
+                "Wrong character array size");
 
   const char* program_name = ProgramInvocationShortName();
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, program_name, strlen(program_name)));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
-  TEMP_FAILURE_RETRY(write(STDERR_FILENO, &log_characters[log_severity], 1));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, &kLogCharacters[static_cast<size_t>(log_severity)], 1));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
   // TODO: pid and tid.
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, file, strlen(file)));
@@ -300,7 +311,7 @@
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "] ", 2));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, message, strlen(message)));
   TEMP_FAILURE_RETRY(write(STDERR_FILENO, "\n", 1));
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 ScopedLogSeverity::ScopedLogSeverity(LogSeverity level) {
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 8aaeaac..3b5b8b5 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -24,6 +24,7 @@
 namespace art {
 
 enum LogSeverity {
+  NONE,            // Fake level, don't log at all.
   VERBOSE,
   DEBUG,
   INFO,
@@ -55,6 +56,7 @@
   bool threads;
   bool verifier;
   bool image;
+  bool systrace_lock_logging;  // Enabled with "-verbose:sys-locks".
 };
 
 // Global log verbosity setting, initialized by InitLogging.
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index dc692d2..7a293c7 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -138,10 +138,10 @@
 #define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)
 
 #define OFFSETOF_MEMBER(t, f) \
-  (reinterpret_cast<const char*>(&reinterpret_cast<t*>(16)->f) - reinterpret_cast<const char*>(16)) // NOLINT
+  (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT
 
-#define OFFSETOF_VOLATILE_MEMBER(t, f) \
-  (reinterpret_cast<volatile char*>(&reinterpret_cast<t*>(16)->f) - reinterpret_cast<volatile char*>(16)) // NOLINT
+#define OFFSETOF_MEMBERPTR(t, f) \
+  (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT
 
 #define PACKED(x) __attribute__ ((__aligned__(x), __packed__))
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 293451c..3dca12a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -76,7 +76,6 @@
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
   kJitDebugInterfaceLock,
-  kJitCodeCacheLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
@@ -88,6 +87,9 @@
   kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
+  kDeoptimizedMethodsLock,
+  kJitCodeCacheLock,
+  kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
   kPinTableLock,
@@ -96,7 +98,7 @@
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kMethodVerifiersLock,
-  kClassLinkerClassesLock,
+  kClassLinkerClassesLock,  // TODO rename.
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
diff --git a/runtime/base/time_utils.cc b/runtime/base/time_utils.cc
index b7cf207..3e5bac8 100644
--- a/runtime/base/time_utils.cc
+++ b/runtime/base/time_utils.cc
@@ -15,6 +15,7 @@
  */
 
 #include <inttypes.h>
+#include <limits>
 #include <sstream>
 
 #include "time_utils.h"
@@ -190,9 +191,16 @@
   }
 
   int64_t end_sec = ts->tv_sec + ms / 1000;
-  if (UNLIKELY(end_sec >= 0x7fffffff)) {
-    LOG(INFO) << "Note: end time exceeds INT32_MAX: " << end_sec;
-    end_sec = 0x7ffffffe;
+  constexpr int32_t int32_max = std::numeric_limits<int32_t>::max();
+  if (UNLIKELY(end_sec >= int32_max)) {
+    // Either ms was intended to denote an infinite timeout, or we have a
+    // problem. The former generally uses the largest possible millisecond
+    // or nanosecond value.  Log only in the latter case.
+    constexpr int64_t int64_max = std::numeric_limits<int64_t>::max();
+    if (ms != int64_max && ms != int64_max / (1000 * 1000)) {
+      LOG(INFO) << "Note: end time exceeds INT32_MAX: " << end_sec;
+    }
+    end_sec = int32_max - 1;  // Allow for increment below.
   }
   ts->tv_sec = end_sec;
   ts->tv_nsec = (ts->tv_nsec + (ms % 1000) * 1000000) + ns;
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index beabce3..639f913 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1176,14 +1176,16 @@
       return false;
     }
 
-    // Get the *correct* JNIEnv by going through our TLS pointer.
+    // Get the current thread's JNIEnv by going through our TLS pointer.
     JNIEnvExt* threadEnv = self->GetJniEnv();
 
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
     if (env != threadEnv) {
+      // Get the thread owning the JNIEnv that's being used.
+      Thread* envThread = reinterpret_cast<JNIEnvExt*>(env)->self;
       AbortF("thread %s using JNIEnv* from thread %s",
-             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*self).c_str());
+             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*envThread).c_str());
       return false;
     }
 
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index fcf3326..0e2f9f2 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_CHECK_REFERENCE_MAP_VISITOR_H_
 
 #include "art_method-inl.h"
-#include "gc_map.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -54,11 +53,8 @@
 
   void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-      CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
-    } else {
-      CheckQuickMethod(registers, number_of_references, native_pc_offset);
-    }
+    CHECK(GetCurrentOatQuickMethodHeader()->IsOptimized());
+    CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
   }
 
  private:
@@ -66,13 +62,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
     CodeInfo code_info = GetCurrentOatQuickMethodHeader()->GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     uint16_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    uint32_t register_mask = stack_map.GetRegisterMask(encoding);
+    uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
     for (int i = 0; i < number_of_references; ++i) {
       int reg = registers[i];
       CHECK(reg < m->GetCodeItem()->registers_size_);
@@ -85,7 +80,8 @@
           break;
         case DexRegisterLocation::Kind::kInStack:
           DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0);
-          CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize));
+          CHECK(stack_map.GetStackMaskBit(encoding.stack_map_encoding,
+                                          location.GetValue() / kFrameSlotSize));
           break;
         case DexRegisterLocation::Kind::kInRegister:
         case DexRegisterLocation::Kind::kInRegisterHigh:
@@ -104,20 +100,6 @@
       }
     }
   }
-
-  void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    NativePcOffsetToReferenceMap map(GetCurrentOatQuickMethodHeader()->GetNativeGcMap());
-    const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
-    CHECK(ref_bitmap);
-    for (int i = 0; i < number_of_references; ++i) {
-      int reg = registers[i];
-      CHECK(reg < m->GetCodeItem()->registers_size_);
-      CHECK((*((ref_bitmap) + reg / 8) >> (reg % 8) ) & 0x01)
-          << "Error: Reg @" << i << " is not in GC map";
-    }
-  }
 };
 
 }  // namespace art
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 7e8a4a4..f3e260b 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -116,9 +116,10 @@
   return resolved_method;
 }
 
-inline mirror::Class* ClassLinker::ResolveReferencedClassOfMethod(Thread* self,
-                                                                  uint32_t method_idx,
-                                                                  ArtMethod* referrer) {
+inline mirror::Class* ClassLinker::ResolveReferencedClassOfMethod(
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader) {
   // NB: We cannot simply use `GetResolvedMethod(method_idx, ...)->GetDeclaringClass()`. This is
   // because if we did so than an invoke-super could be incorrectly dispatched in cases where
   // GetMethodId(method_idx).class_idx_ refers to a non-interface, non-direct-superclass
@@ -127,15 +128,11 @@
   // interface (either miranda, default or conflict) we would incorrectly assume that is what we
   // want to invoke on, instead of the 'concrete' implementation that the direct superclass
   // contains.
-  mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-  const DexFile* dex_file = h_dex_cache->GetDexFile();
+  const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::MethodId& method = dex_file->GetMethodId(method_idx);
-  mirror::Class* resolved_type = h_dex_cache->GetResolvedType(method.class_idx_);
+  mirror::Class* resolved_type = dex_cache->GetResolvedType(method.class_idx_);
   if (UNLIKELY(resolved_type == nullptr)) {
-    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    resolved_type = ResolveType(*dex_file, method.class_idx_, h_dex_cache, class_loader);
+    resolved_type = ResolveType(*dex_file, method.class_idx_, dex_cache, class_loader);
   }
   return resolved_type;
 }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 99e38d9..e9b8643 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -50,7 +50,7 @@
 #include "experimental_flags.h"
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
-#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
@@ -96,6 +96,7 @@
 namespace art {
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
+static constexpr bool kVerifyArtMethodDeclaringClasses = kIsDebugBuild;
 
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
@@ -686,6 +687,9 @@
     self->AssertNoPendingException();
   }
 
+  // Create conflict tables that depend on the class linker.
+  runtime->FixupConflictTables();
+
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromCompiler exiting";
@@ -772,9 +776,13 @@
     bool contains = false;
     for (gc::space::ImageSpace* space : spaces) {
       auto& header = space->GetImageHeader();
-      auto& methods = header.GetMethodsSection();
-      auto offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
-      contains |= methods.Contains(offset);
+      size_t offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
+
+      const ImageSection& methods = header.GetMethodsSection();
+      contains = contains || methods.Contains(offset);
+
+      const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
+      contains = contains || runtime_methods.Contains(offset);
     }
     CHECK(contains) << m << " not found";
   }
@@ -1197,6 +1205,23 @@
   ClassTable* const table_;
 };
 
+class VerifyDeclaringClassVisitor : public ArtMethodVisitor {
+ public:
+  VerifyDeclaringClassVisitor() SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_)
+      : live_bitmap_(Runtime::Current()->GetHeap()->GetLiveBitmap()) {}
+
+  virtual void Visit(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    mirror::Class* klass = method->GetDeclaringClassUnchecked();
+    if (klass != nullptr) {
+      CHECK(live_bitmap_->Test(klass)) << "Image method has unmarked declaring class";
+    }
+  }
+
+ private:
+  gc::accounting::HeapBitmap* const live_bitmap_;
+};
+
 bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1416,15 +1441,18 @@
         }
       }
     }
-    if (*out_forward_dex_cache_array) {
-      ScopedTrace timing("Fixup ArtMethod dex cache arrays");
-      FixupArtMethodArrayVisitor visitor(header);
-      header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-          &visitor,
-          space->Begin(),
-          sizeof(void*));
-      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
-    }
+  }
+  if (*out_forward_dex_cache_array) {
+    ScopedTrace timing("Fixup ArtMethod dex cache arrays");
+    FixupArtMethodArrayVisitor visitor(header);
+    header.VisitPackedArtMethods(&visitor, space->Begin(), sizeof(void*));
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader.Get());
+  }
+  if (kVerifyArtMethodDeclaringClasses) {
+    ScopedTrace timing("Verify declaring classes");
+    ReaderMutexLock rmu(self, *Locks::heap_bitmap_lock_);
+    VerifyDeclaringClassVisitor visitor;
+    header.VisitPackedArtMethods(&visitor, space->Begin(), sizeof(void*));
   }
   return true;
 }
@@ -1702,9 +1730,8 @@
 
   // Set entry point to interpreter if in InterpretOnly mode.
   if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    const ImageSection& methods = header.GetMethodsSection();
     SetInterpreterEntrypointArtMethodVisitor visitor(image_pointer_size_);
-    methods.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
+    header.VisitPackedArtMethods(&visitor, space->Begin(), image_pointer_size_);
   }
 
   ClassTable* class_table = nullptr;
@@ -1773,10 +1800,7 @@
     // This verification needs to happen after the classes have been added to the class loader.
     // Since it ensures classes are in the class table.
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
-    header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &visitor2,
-        space->Begin(),
-        sizeof(void*));
+    header.VisitPackedArtMethods(&visitor2, space->Begin(), sizeof(void*));
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -2008,6 +2032,7 @@
   Runtime* const runtime = Runtime::Current();
   JavaVMExt* const vm = runtime->GetJavaVM();
   vm->DeleteWeakGlobalRef(self, data.weak_root);
+  // Notify the JIT that we need to remove the methods and/or profiling info.
   if (runtime->GetJit() != nullptr) {
     jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
     if (code_cache != nullptr) {
@@ -2725,7 +2750,7 @@
   }
 
   if (runtime->IsNativeDebuggable()) {
-    DCHECK(runtime->UseJit() && runtime->GetJit()->JitAtFirstUse());
+    DCHECK(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse());
     // If we are doing native debugging, ignore application's AOT code,
     // since we want to JIT it with extra stackmaps for native debugging.
     // On the other hand, keep all AOT code from the boot image, since the
@@ -3715,10 +3740,6 @@
   DCHECK(klass.Get() != nullptr);
   DCHECK(supertype.Get() != nullptr);
 
-  StackHandleScope<1> hs(self);
-  // Acquire lock to prevent races on verifying the super class.
-  ObjectLock<mirror::Class> super_lock(self, supertype);
-
   if (!supertype->IsVerified() && !supertype->IsErroneous()) {
     VerifyClass(self, supertype);
   }
@@ -3732,6 +3753,7 @@
                    PrettyDescriptor(klass.Get()).c_str(),
                    PrettyDescriptor(supertype.Get()).c_str());
   LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
+  StackHandleScope<1> hs(self);
   Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
   if (cause.Get() != nullptr) {
     // Set during VerifyClass call (if at all).
@@ -3746,44 +3768,59 @@
   if (Runtime::Current()->IsAotCompiler()) {
     Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
   }
+  // Need to grab the lock to change status.
+  ObjectLock<mirror::Class> super_lock(self, klass);
   mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
   return false;
 }
 
-void ClassLinker::VerifyClass(Thread* self, Handle<mirror::Class> klass) {
-  // TODO: assert that the monitor on the Class is held
-  ObjectLock<mirror::Class> lock(self, klass);
+void ClassLinker::VerifyClass(Thread* self, Handle<mirror::Class> klass, LogSeverity log_level) {
+  {
+    // TODO: assert that the monitor on the Class is held
+    ObjectLock<mirror::Class> lock(self, klass);
 
-  // Don't attempt to re-verify if already sufficiently verified.
-  if (klass->IsVerified()) {
-    EnsureSkipAccessChecksMethods(klass);
-    return;
-  }
-  if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
-    return;
-  }
+    // Is somebody verifying this now?
+    mirror::Class::Status old_status = klass->GetStatus();
+    while (old_status == mirror::Class::kStatusVerifying ||
+        old_status == mirror::Class::kStatusVerifyingAtRuntime) {
+      lock.WaitIgnoringInterrupts();
+      CHECK(klass->IsErroneous() || (klass->GetStatus() > old_status))
+          << "Class '" << PrettyClass(klass.Get()) << "' performed an illegal verification state "
+          << "transition from " << old_status << " to " << klass->GetStatus();
+      old_status = klass->GetStatus();
+    }
 
-  // The class might already be erroneous, for example at compile time if we attempted to verify
-  // this class as a parent to another.
-  if (klass->IsErroneous()) {
-    ThrowEarlierClassFailure(klass.Get());
-    return;
-  }
+    // The class might already be erroneous, for example at compile time if we attempted to verify
+    // this class as a parent to another.
+    if (klass->IsErroneous()) {
+      ThrowEarlierClassFailure(klass.Get());
+      return;
+    }
 
-  if (klass->GetStatus() == mirror::Class::kStatusResolved) {
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
-  } else {
-    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
-        << PrettyClass(klass.Get());
-    CHECK(!Runtime::Current()->IsAotCompiler());
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
-  }
+    // Don't attempt to re-verify if already sufficiently verified.
+    if (klass->IsVerified()) {
+      EnsureSkipAccessChecksMethods(klass);
+      return;
+    }
+    if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
+      return;
+    }
 
-  // Skip verification if disabled.
-  if (!Runtime::Current()->IsVerificationEnabled()) {
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-    EnsureSkipAccessChecksMethods(klass);
-    return;
+    if (klass->GetStatus() == mirror::Class::kStatusResolved) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
+    } else {
+      CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
+            << PrettyClass(klass.Get());
+      CHECK(!Runtime::Current()->IsAotCompiler());
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
+    }
+
+    // Skip verification if disabled.
+    if (!Runtime::Current()->IsVerificationEnabled()) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
+      EnsureSkipAccessChecksMethods(klass);
+      return;
+    }
   }
 
   // Verify super class.
@@ -3853,9 +3890,13 @@
                                                              klass.Get(),
                                                              runtime->GetCompilerCallbacks(),
                                                              runtime->IsAotCompiler(),
-                                                             runtime->IsAotCompiler(),
+                                                             log_level,
                                                              &error_msg);
   }
+
+  // Verification is done, grab the lock again.
+  ObjectLock<mirror::Class> lock(self, klass);
+
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
     if (!preverified && verifier_failure != verifier::MethodVerifier::kNoFailure) {
       VLOG(class_linker) << "Soft verification failure in class " << PrettyDescriptor(klass.Get())
@@ -4399,7 +4440,20 @@
         // We failed to verify, expect either the klass to be erroneous or verification failed at
         // compile time.
         if (klass->IsErroneous()) {
-          CHECK(self->IsExceptionPending());
+          // The class is erroneous. This may be a verifier error, or another thread attempted
+          // verification and/or initialization and failed. We can distinguish those cases by
+          // whether an exception is already pending.
+          if (self->IsExceptionPending()) {
+            // Check that it's a VerifyError.
+            DCHECK_EQ("java.lang.Class<java.lang.VerifyError>",
+                      PrettyClass(self->GetException()->GetClass()));
+          } else {
+            // Check that another thread attempted initialization.
+            DCHECK_NE(0, klass->GetClinitThreadId());
+            DCHECK_NE(self->GetTid(), klass->GetClinitThreadId());
+            // Need to rethrow the previous failure now.
+            ThrowEarlierClassFailure(klass.Get(), true);
+          }
           VlogClassInitializationFailure(klass);
         } else {
           CHECK(Runtime::Current()->IsAotCompiler());
@@ -4409,6 +4463,14 @@
       } else {
         self->AssertNoPendingException();
       }
+
+      // A separate thread could have moved us all the way to initialized. A "simple" example
+      // involves a subclass of the current class being initialized at the same time (which
+      // will implicitly initialize the superclass, if scheduled that way). b/28254258
+      DCHECK_NE(mirror::Class::kStatusError, klass->GetStatus());
+      if (klass->IsInitialized()) {
+        return true;
+      }
     }
 
     // If the class is kStatusInitializing, either this thread is
@@ -5833,9 +5895,14 @@
           !target_name_comparator.HasSameNameAndSignature(
               current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
         continue;
+      } else if (!current_method->IsPublic()) {
+        // The verifier should have caught the non-public method for dex version 37. Just warn and
+        // skip it since this is from before default-methods so we don't really need to care that it
+        // has code.
+        LOG(WARNING) << "Interface method " << PrettyMethod(current_method) << " is not public! "
+                     << "This will be a fatal error in subsequent versions of android. "
+                     << "Continuing anyway.";
       }
-      // The verifier should have caught the non-public method.
-      DCHECK(current_method->IsPublic()) << "Interface method is not public!";
       if (UNLIKELY(chosen_iface.Get() != nullptr)) {
         // We have multiple default impls of the same method. This is a potential default conflict.
         // We need to check if this possibly conflicting method is either a superclass of the chosen
@@ -5892,16 +5959,49 @@
   }
 }
 
-// Sets imt_ref appropriately for LinkInterfaceMethods.
-// If there is no method in the imt location of imt_ref it will store the given method there.
-// Otherwise it will set the conflict method which will figure out which method to use during
-// runtime.
-static void SetIMTRef(ArtMethod* unimplemented_method,
-                      ArtMethod* imt_conflict_method,
-                      size_t image_pointer_size,
-                      ArtMethod* current_method,
-                      /*out*/ArtMethod** imt_ref)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+ArtMethod* ClassLinker::AddMethodToConflictTable(mirror::Class* klass,
+                                                 ArtMethod* conflict_method,
+                                                 ArtMethod* interface_method,
+                                                 ArtMethod* method,
+                                                 bool force_new_conflict_method) {
+  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+  Runtime* const runtime = Runtime::Current();
+  LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
+  bool new_entry = conflict_method == runtime->GetImtConflictMethod() || force_new_conflict_method;
+
+  // Create a new entry if the existing one is the shared conflict method.
+  ArtMethod* new_conflict_method = new_entry
+      ? runtime->CreateImtConflictMethod(linear_alloc)
+      : conflict_method;
+
+  // Allocate a new table. Note that we will leak this table at the next conflict,
+  // but that's a tradeoff compared to making the table fixed size.
+  void* data = linear_alloc->Alloc(
+      Thread::Current(), ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table,
+                                                                       image_pointer_size_));
+  if (data == nullptr) {
+    LOG(ERROR) << "Failed to allocate conflict table";
+    return conflict_method;
+  }
+  ImtConflictTable* new_table = new (data) ImtConflictTable(current_table,
+                                                            interface_method,
+                                                            method,
+                                                            image_pointer_size_);
+
+  // Do a fence to ensure threads see the data in the table before it is assigned
+  // to the conflict method.
+  // Note that there is a race in the presence of multiple threads and we may leak
+  // memory from the LinearAlloc, but that's a tradeoff compared to using
+  // atomic operations.
+  QuasiAtomic::ThreadFenceRelease();
+  new_conflict_method->SetImtConflictTable(new_table, image_pointer_size_);
+  return new_conflict_method;
+}
+
+void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
+                            ArtMethod* imt_conflict_method,
+                            ArtMethod* current_method,
+                            /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
     *imt_ref = current_method;
@@ -5911,9 +6011,9 @@
     // Note that we have checked IsRuntimeMethod, as there may be multiple different
     // conflict methods.
     MethodNameAndSignatureComparator imt_comparator(
-        (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size));
+        (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size_));
     if (imt_comparator.HasSameNameAndSignature(
-          current_method->GetInterfaceMethodIfProxy(image_pointer_size))) {
+          current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
       *imt_ref = current_method;
     } else {
       *imt_ref = imt_conflict_method;
@@ -5926,6 +6026,151 @@
   }
 }
 
+void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
+  DCHECK(klass->ShouldHaveEmbeddedImtAndVTable()) << PrettyClass(klass);
+  DCHECK(!klass->IsTemp()) << PrettyClass(klass);
+  ArtMethod* imt[mirror::Class::kImtSize];
+  Runtime* const runtime = Runtime::Current();
+  ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
+  ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
+  std::fill_n(imt, arraysize(imt), unimplemented_method);
+  if (klass->GetIfTable() != nullptr) {
+    FillIMTFromIfTable(klass->GetIfTable(),
+                       unimplemented_method,
+                       conflict_method,
+                       klass,
+                       true,
+                       false,
+                       &imt[0]);
+  }
+  for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+    klass->SetEmbeddedImTableEntry(i, imt[i], image_pointer_size_);
+  }
+}
+
+static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+}
+
+ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
+                                                      LinearAlloc* linear_alloc,
+                                                      size_t image_pointer_size) {
+  void* data = linear_alloc->Alloc(Thread::Current(),
+                                   ImtConflictTable::ComputeSize(count,
+                                                                 image_pointer_size));
+  return (data != nullptr) ? new (data) ImtConflictTable(count, image_pointer_size) : nullptr;
+}
+
+ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count, LinearAlloc* linear_alloc) {
+  return CreateImtConflictTable(count, linear_alloc, image_pointer_size_);
+}
+
+void ClassLinker::FillIMTFromIfTable(mirror::IfTable* if_table,
+                                     ArtMethod* unimplemented_method,
+                                     ArtMethod* imt_conflict_method,
+                                     mirror::Class* klass,
+                                     bool create_conflict_tables,
+                                     bool ignore_copied_methods,
+                                     ArtMethod** imt) {
+  uint32_t conflict_counts[mirror::Class::kImtSize] = {};
+  for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
+    mirror::Class* interface = if_table->GetInterface(i);
+    const size_t num_virtuals = interface->NumVirtualMethods();
+    const size_t method_array_count = if_table->GetMethodArrayCount(i);
+    // Virtual methods can be larger than the if table methods if there are default methods.
+    DCHECK_GE(num_virtuals, method_array_count);
+    if (kIsDebugBuild) {
+      if (klass->IsInterface()) {
+        DCHECK_EQ(method_array_count, 0u);
+      } else {
+        DCHECK_EQ(interface->NumDeclaredVirtualMethods(), method_array_count);
+      }
+    }
+    if (method_array_count == 0) {
+      continue;
+    }
+    auto* method_array = if_table->GetMethodArray(i);
+    for (size_t j = 0; j < method_array_count; ++j) {
+      ArtMethod* implementation_method =
+          method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+      if (ignore_copied_methods && implementation_method->IsCopied()) {
+        continue;
+      }
+      DCHECK(implementation_method != nullptr);
+      // Miranda methods cannot be used to implement an interface method, but they are safe to put
+      // in the IMT since their entrypoint is the interface trampoline. If we put any copied methods
+      // or interface methods in the IMT here they will not create extra conflicts since we compare
+      // names and signatures in SetIMTRef.
+      ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
+      const uint32_t imt_index = GetIMTIndex(interface_method);
+
+      // There is only any conflicts if all of the interface methods for an IMT slot don't have
+      // the same implementation method, keep track of this to avoid creating a conflict table in
+      // this case.
+
+      // Conflict table size for each IMT slot.
+      ++conflict_counts[imt_index];
+
+      SetIMTRef(unimplemented_method,
+                imt_conflict_method,
+                implementation_method,
+                /*out*/&imt[imt_index]);
+    }
+  }
+
+  if (create_conflict_tables) {
+    // Create the conflict tables.
+    LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      size_t conflicts = conflict_counts[i];
+      if (imt[i] == imt_conflict_method) {
+        ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
+        if (new_table != nullptr) {
+          ArtMethod* new_conflict_method =
+              Runtime::Current()->CreateImtConflictMethod(linear_alloc);
+          new_conflict_method->SetImtConflictTable(new_table, image_pointer_size_);
+          imt[i] = new_conflict_method;
+        } else {
+          LOG(ERROR) << "Failed to allocate conflict table";
+          imt[i] = imt_conflict_method;
+        }
+      } else {
+        DCHECK_NE(imt[i], imt_conflict_method);
+      }
+    }
+
+    for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
+      mirror::Class* interface = if_table->GetInterface(i);
+      const size_t method_array_count = if_table->GetMethodArrayCount(i);
+      // Virtual methods can be larger than the if table methods if there are default methods.
+      if (method_array_count == 0) {
+        continue;
+      }
+      auto* method_array = if_table->GetMethodArray(i);
+      for (size_t j = 0; j < method_array_count; ++j) {
+        ArtMethod* implementation_method =
+            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+        if (ignore_copied_methods && implementation_method->IsCopied()) {
+          continue;
+        }
+        DCHECK(implementation_method != nullptr);
+        ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
+        const uint32_t imt_index = GetIMTIndex(interface_method);
+        if (!imt[imt_index]->IsRuntimeMethod() ||
+            imt[imt_index] == unimplemented_method ||
+            imt[imt_index] == imt_conflict_method) {
+          continue;
+        }
+        ImtConflictTable* table = imt[imt_index]->GetImtConflictTable(image_pointer_size_);
+        const size_t num_entries = table->NumEntries(image_pointer_size_);
+        table->SetInterfaceMethod(num_entries, image_pointer_size_, interface_method);
+        table->SetImplementationMethod(num_entries, image_pointer_size_, implementation_method);
+      }
+    }
+  }
+}
+
 // Simple helper function that checks that no subtypes of 'val' are contained within the 'classes'
 // set.
 static bool NotSubinterfaceOfAny(const std::unordered_set<mirror::Class*>& classes,
@@ -6161,48 +6406,28 @@
   }
 }
 
-static void FillImtFromSuperClass(Handle<mirror::Class> klass,
-                                  Handle<mirror::IfTable> iftable,
-                                  ArtMethod* unimplemented_method,
-                                  ArtMethod* imt_conflict_method,
-                                  ArtMethod** out_imt,
-                                  size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_) {
+void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
+                                        ArtMethod* unimplemented_method,
+                                        ArtMethod* imt_conflict_method,
+                                        ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
   mirror::Class* super_class = klass->GetSuperClass();
   if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
     for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      out_imt[i] = super_class->GetEmbeddedImTableEntry(i, pointer_size);
+      imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
     mirror::IfTable* if_table = super_class->GetIfTable();
-    const size_t length = super_class->GetIfTableCount();
-    for (size_t i = 0; i < length; ++i) {
-      mirror::Class* interface = iftable->GetInterface(i);
-      const size_t num_virtuals = interface->NumDeclaredVirtualMethods();
-      const size_t method_array_count = if_table->GetMethodArrayCount(i);
-      DCHECK_EQ(num_virtuals, method_array_count);
-      if (method_array_count == 0) {
-        continue;
-      }
-      auto* method_array = if_table->GetMethodArray(i);
-      for (size_t j = 0; j < num_virtuals; ++j) {
-        auto method = method_array->GetElementPtrSize<ArtMethod*>(j, pointer_size);
-        DCHECK(method != nullptr) << PrettyClass(super_class);
-        // Miranda methods cannot be used to implement an interface method and defaults should be
-        // skipped in case we override it.
-        if (method->IsDefault() || method->IsMiranda()) {
-          continue;
-        }
-        ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-        uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-        auto** imt_ref = &out_imt[imt_index];
-        if (*imt_ref == unimplemented_method) {
-          *imt_ref = method;
-        } else if (*imt_ref != imt_conflict_method) {
-          *imt_ref = imt_conflict_method;
-        }
-      }
+    if (if_table != nullptr) {
+      // Ignore copied methods since we will handle these in LinkInterfaceMethods.
+      FillIMTFromIfTable(if_table,
+                         unimplemented_method,
+                         imt_conflict_method,
+                         klass.Get(),
+                         /*create_conflict_table*/false,
+                         /*ignore_copied_methods*/true,
+                         /*out*/imt);
     }
   }
 }
@@ -6245,13 +6470,10 @@
   const bool extend_super_iftable = has_superclass;
   if (has_superclass && fill_tables) {
     FillImtFromSuperClass(klass,
-                          iftable,
                           unimplemented_method,
                           imt_conflict_method,
-                          out_imt,
-                          image_pointer_size_);
+                          out_imt);
   }
-
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
   // thread suspension.
   if (fill_tables) {
@@ -6335,7 +6557,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+        uint32_t imt_index = GetIMTIndex(interface_method);
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -6380,7 +6602,6 @@
                 // Place method in imt if entry is empty, place conflict otherwise.
                 SetIMTRef(unimplemented_method,
                           imt_conflict_method,
-                          image_pointer_size_,
                           vtable_method,
                           /*out*/imt_ptr);
               }
@@ -6414,6 +6635,17 @@
             // The method is not overridable by a default method (i.e. it is directly implemented
             // in some class). Therefore move onto the next interface method.
             continue;
+          } else {
+            // If the super-classes method is override-able by a default method we need to keep
+            // track of it since though it is override-able it is not guaranteed to be 'overridden'.
+            // If it turns out not to be overridden and we did not keep track of it we might add it
+            // to the vtable twice, causing corruption (vtable entries having inconsistent and
+            // illegal states, incorrect vtable size, and incorrect or inconsistent iftable entries)
+            // in this class and any subclasses.
+            DCHECK(vtable_impl == nullptr || vtable_impl == supers_method)
+                << "vtable_impl was " << PrettyMethod(vtable_impl) << " and not 'nullptr' or "
+                << PrettyMethod(supers_method) << " as expected. IFTable appears to be corrupt!";
+            vtable_impl = supers_method;
           }
         }
         // If we haven't found it yet we should search through the interfaces for default methods.
@@ -6512,7 +6744,6 @@
             method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
             SetIMTRef(unimplemented_method,
                       imt_conflict_method,
-                      image_pointer_size_,
                       current_method,
                       /*out*/imt_ptr);
           }
@@ -6739,6 +6970,7 @@
     }
     // Put some random garbage in old methods to help find stale pointers.
     if (methods != old_methods && old_methods != nullptr) {
+      WriterMutexLock mu(self, ClassTableForClassLoader(klass->GetClassLoader())->GetLock());
       memset(old_methods, 0xFEu, old_size);
     }
   } else {
@@ -7024,6 +7256,23 @@
   return string;
 }
 
+mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
+                                          uint32_t string_idx,
+                                          Handle<mirror::DexCache> dex_cache) {
+  DCHECK(dex_cache.Get() != nullptr);
+  mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
+  if (resolved != nullptr) {
+    return resolved;
+  }
+  uint32_t utf16_length;
+  const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
+  mirror::String* string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
+  if (string != nullptr) {
+    dex_cache->SetResolvedString(string_idx, string);
+  }
+  return string;
+}
+
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
                                         uint16_t type_idx,
                                         mirror::Class* referrer) {
@@ -7695,12 +7944,11 @@
 void ClassLinker::InsertDexFileInToClassLoader(mirror::Object* dex_file,
                                                mirror::ClassLoader* class_loader) {
   DCHECK(dex_file != nullptr);
-  DCHECK(class_loader != nullptr);
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  ClassTable* const table = class_loader->GetClassTable();
+  ClassTable* const table = ClassTableForClassLoader(class_loader);
   DCHECK(table != nullptr);
-  if (table->InsertDexFile(dex_file)) {
+  if (table->InsertDexFile(dex_file) && class_loader != nullptr) {
     // It was not already inserted, perform the write barrier to let the GC know the class loader's
     // class table was modified.
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
@@ -7750,6 +7998,7 @@
     VLOG(class_linker) << "Collecting class profile for dex file " << location
                        << " types=" << num_types << " class_defs=" << num_class_defs;
     DexCacheResolvedClasses resolved_classes(dex_file->GetLocation(),
+                                             dex_file->GetBaseLocation(),
                                              dex_file->GetLocationChecksum());
     size_t num_resolved = 0;
     std::unordered_set<uint16_t> class_set;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index b4fbe1c..d1c8172 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -53,6 +53,7 @@
   class StackTraceElement;
 }  // namespace mirror
 
+class ImtConflictTable;
 template<class T> class Handle;
 template<class T> class MutableHandle;
 class InternTable;
@@ -247,6 +248,12 @@
                                 Handle<mirror::DexCache> dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Find a String with the given index from the DexFile, storing the
+  // result in the DexCache if found. Return null if not found.
+  mirror::String* LookupString(const DexFile& dex_file, uint32_t string_idx,
+                               Handle<mirror::DexCache> dex_cache)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identity the
   // target DexCache and ClassLoader to use for resolution.
@@ -304,9 +311,9 @@
   // This returns the class referred to by GetMethodId(method_idx).class_idx_. This might be
   // different then the declaring class of the resolved method due to copied
   // miranda/default/conflict methods.
-  mirror::Class* ResolveReferencedClassOfMethod(Thread* self,
-                                                uint32_t method_idx,
-                                                ArtMethod* referrer)
+  mirror::Class* ResolveReferencedClassOfMethod(uint32_t method_idx,
+                                                Handle<mirror::DexCache> dex_cache,
+                                                Handle<mirror::ClassLoader> class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
   template <ResolveMode kResolveMode>
@@ -449,7 +456,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  void VerifyClass(Thread* self, Handle<mirror::Class> klass)
+  void VerifyClass(Thread* self,
+                   Handle<mirror::Class> klass,
+                   LogSeverity log_level = LogSeverity::NONE)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file,
@@ -587,6 +596,7 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // May be called with null class_loader due to legacy code. b/27954959
   void InsertDexFileInToClassLoader(mirror::Object* dex_file, mirror::ClassLoader* class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -601,6 +611,26 @@
       const std::set<DexCacheResolvedClasses>& classes)
       REQUIRES(!dex_lock_);
 
+  ArtMethod* AddMethodToConflictTable(mirror::Class* klass,
+                                      ArtMethod* conflict_method,
+                                      ArtMethod* interface_method,
+                                      ArtMethod* method,
+                                      bool force_new_conflict_method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Create a conflict table with a specified capacity.
+  ImtConflictTable* CreateImtConflictTable(size_t count, LinearAlloc* linear_alloc);
+
+  // Static version for when the class linker is not yet created.
+  static ImtConflictTable* CreateImtConflictTable(size_t count,
+                                                  LinearAlloc* linear_alloc,
+                                                  size_t pointer_size);
+
+
+  // Create the IMT and conflict tables for a class.
+  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
     // not work properly.
@@ -990,7 +1020,7 @@
 
   // Returns null if not found.
   ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Insert a new class table if not found.
   ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
@@ -1048,6 +1078,28 @@
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Sets imt_ref appropriately for LinkInterfaceMethods.
+  // If there is no method in the imt location of imt_ref it will store the given method there.
+  // Otherwise it will set the conflict method which will figure out which method to use during
+  // runtime.
+  void SetIMTRef(ArtMethod* unimplemented_method,
+                 ArtMethod* imt_conflict_method,
+                 ArtMethod* current_method,
+                 /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void FillIMTFromIfTable(mirror::IfTable* if_table,
+                          ArtMethod* unimplemented_method,
+                          ArtMethod* imt_conflict_method,
+                          mirror::Class* klass,
+                          bool create_conflict_tables,
+                          bool ignore_copied_methods,
+                          ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void FillImtFromSuperClass(Handle<mirror::Class> klass,
+                             ArtMethod* unimplemented_method,
+                             ArtMethod* imt_conflict_method,
+                             ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> boot_dex_files_;
 
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index e512906..42e320a 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -23,6 +23,7 @@
 
 template<class Visitor>
 void ClassTable::VisitRoots(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
@@ -35,6 +36,7 @@
 
 template<class Visitor>
 void ClassTable::VisitRoots(const Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
@@ -47,6 +49,7 @@
 
 template <typename Visitor>
 bool ClassTable::Visit(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       if (!visitor(root.Read())) {
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index d815b1a..8267c68 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -20,17 +20,19 @@
 
 namespace art {
 
-ClassTable::ClassTable() {
+ClassTable::ClassTable() : lock_("Class loader classes", kClassLoaderClassesLock) {
   Runtime* const runtime = Runtime::Current();
   classes_.push_back(ClassSet(runtime->GetHashTableMinLoadFactor(),
                               runtime->GetHashTableMaxLoadFactor()));
 }
 
 void ClassTable::FreezeSnapshot() {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.push_back(ClassSet());
 }
 
 bool ClassTable::Contains(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
     if (it != class_set.end()) {
@@ -41,6 +43,7 @@
 }
 
 mirror::Class* ClassTable::LookupByDescriptor(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
     if (it != class_set.end()) {
@@ -51,6 +54,7 @@
 }
 
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
   auto existing_it = classes_.back().FindWithHash(descriptor, hash);
   if (kIsDebugBuild && existing_it == classes_.back().end()) {
@@ -74,6 +78,7 @@
 }
 
 size_t ClassTable::NumZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
     sum += classes_[i].Size();
@@ -82,10 +87,12 @@
 }
 
 size_t ClassTable::NumNonZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   return classes_.back().Size();
 }
 
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.FindWithHash(descriptor, hash);
     if (it != class_set.end()) {
@@ -96,14 +103,17 @@
 }
 
 void ClassTable::Insert(mirror::Class* klass) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().Insert(GcRoot<mirror::Class>(klass));
 }
 
 void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
 }
 
 bool ClassTable::Remove(const char* descriptor) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(descriptor);
     if (it != class_set.end()) {
@@ -137,6 +147,7 @@
 }
 
 bool ClassTable::InsertDexFile(mirror::Object* dex_file) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   DCHECK(dex_file != nullptr);
   for (GcRoot<mirror::Object>& root : dex_files_) {
     if (root.Read() == dex_file) {
@@ -148,6 +159,7 @@
 }
 
 size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   ClassSet combined;
   // Combine all the class sets in case there are multiple, also adjusts load factor back to
   // default in case classes were pruned.
@@ -173,6 +185,7 @@
 }
 
 void ClassTable::AddClassSet(ClassSet&& set) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.insert(classes_.begin(), std::move(set));
 }
 
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 0e0e860..686381d 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -71,87 +71,100 @@
 
   // Used by image writer for checking.
   bool Contains(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Freeze the current class tables by allocating a new table and never updating or modifying the
   // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
   void FreezeSnapshot()
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of classes in previous snapshots.
-  size_t NumZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
+  size_t NumZygoteClasses() const REQUIRES(!lock_);
 
   // Returns all off the classes in the lastest snapshot.
-  size_t NumNonZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
+  size_t NumNonZygoteClasses() const REQUIRES(!lock_);
 
   // Update a class in the table with the new class. Returns the existing class which was replaced.
   mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
   template<class Visitor>
   void VisitRoots(Visitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   template<class Visitor>
   void VisitRoots(const Visitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Stops visit if the visitor returns false.
   template <typename Visitor>
   bool Visit(Visitor& visitor)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
   mirror::Class* Lookup(const char* descriptor, size_t hash)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor of klass. Returns null if there are none.
   mirror::Class* LookupByDescriptor(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void Insert(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
   void InsertWithHash(mirror::Class* klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the class was found and removed, false otherwise.
   bool Remove(const char* descriptor)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return true if we inserted the dex file, false if it already exists.
   bool InsertDexFile(mirror::Object* dex_file)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Combines all of the tables into one class set.
   size_t WriteToMemory(uint8_t* ptr) const
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Read a table from ptr and put it at the front of the class set.
   size_t ReadFromMemory(uint8_t* ptr)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add a class set to the front of classes.
   void AddClassSet(ClassSet&& set)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ReaderWriterMutex& GetLock() {
+    return lock_;
+  }
+
  private:
-  // TODO: shard lock to have one per class loader.
+  // Lock to guard inserting and removing.
+  mutable ReaderWriterMutex lock_;
   // We have a vector to help prevent dirty pages after the zygote forks by calling FreezeSnapshot.
-  std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<ClassSet> classes_ GUARDED_BY(lock_);
   // Dex files used by the class loader which may not be owned by the class loader. We keep these
   // live so that we do not have issues closing any of the dex files.
-  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(lock_);
 };
 
 }  // namespace art
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index 31a1bc1..d617caf 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -83,6 +83,34 @@
   UNREACHABLE();
 }
 
+CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverything:
+      return filter;
+
+    case CompilerFilter::kVerifyProfile:
+      return CompilerFilter::kInterpretOnly;
+
+    case CompilerFilter::kSpaceProfile:
+      return CompilerFilter::kSpace;
+
+    case CompilerFilter::kSpeedProfile:
+      return CompilerFilter::kSpeed;
+
+    case CompilerFilter::kEverythingProfile:
+      return CompilerFilter::kEverything;
+  }
+  UNREACHABLE();
+}
+
+
 bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) {
   return current >= target;
 }
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index 1bea8b4..e8d74dd 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -44,6 +44,8 @@
     kEverything,          // Compile everything capable of being compiled.
   };
 
+  static const Filter kDefaultCompilerFilter = kSpeed;
+
   // Returns true if an oat file with this compiler filter contains
   // compiled executable code.
   static bool IsCompilationEnabled(Filter filter);
@@ -59,6 +61,9 @@
   // profile.
   static bool DependsOnProfile(Filter filter);
 
+  // Returns a non-profile-guided version of the given filter.
+  static Filter GetNonProfileDependentFilterFrom(Filter filter);
+
   // Returns true if the 'current' compiler filter is considered at least as
   // good as the 'target' compilation type.
   // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 109e03d..55f68d3 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2362,6 +2362,10 @@
 }
 
 void Dbg::SuspendVM() {
+  // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseDebugger,
+                                  gc::kCollectorTypeDebugger);
   Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
 }
 
@@ -4101,6 +4105,8 @@
   // Suspend other threads if the invoke is not single-threaded.
   if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
     ScopedThreadSuspension sts(soa.Self(), kWaitingForDebuggerSuspension);
+    // Avoid a deadlock between GC and debugger where GC gets suspended during GC. b/25800335.
+    gc::ScopedGCCriticalSection gcs(soa.Self(), gc::kGcCauseDebugger, gc::kCollectorTypeDebugger);
     VLOG(jdwp) << "      Suspending all threads";
     Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
   }
@@ -4818,7 +4824,7 @@
   LOG(INFO) << "Tracked allocations, (count=" << count << ")";
   for (auto it = records->RBegin(), end = records->REnd();
       count > 0 && it != end; count--, it++) {
-    const gc::AllocRecord* record = it->second;
+    const gc::AllocRecord* record = &it->second;
 
     LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount())
               << PrettyClass(record->GetClass());
@@ -4957,7 +4963,7 @@
     uint16_t count = capped_count;
     for (auto it = records->RBegin(), end = records->REnd();
          count > 0 && it != end; count--, it++) {
-      const gc::AllocRecord* record = it->second;
+      const gc::AllocRecord* record = &it->second;
       std::string temp;
       class_names.Add(record->GetClassDescriptor(&temp));
       for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) {
@@ -5008,7 +5014,7 @@
       // (2b) thread id
       // (2b) allocated object's class name index
       // (1b) stack depth
-      const gc::AllocRecord* record = it->second;
+      const gc::AllocRecord* record = &it->second;
       size_t stack_depth = record->GetDepth();
       size_t allocated_object_class_name_index =
           class_names.IndexOf(record->GetClassDescriptor(&temp));
diff --git a/runtime/dex_cache_resolved_classes.h b/runtime/dex_cache_resolved_classes.h
index 80c12cb..0febbed 100644
--- a/runtime/dex_cache_resolved_classes.h
+++ b/runtime/dex_cache_resolved_classes.h
@@ -26,8 +26,11 @@
 // Data structure for passing around which classes belonging to a dex cache / dex file are resolved.
 class DexCacheResolvedClasses {
  public:
-  DexCacheResolvedClasses(const std::string& dex_location, uint32_t location_checksum)
+  DexCacheResolvedClasses(const std::string& dex_location,
+                          const std::string& base_location,
+                          uint32_t location_checksum)
       : dex_location_(dex_location),
+        base_location_(base_location),
         location_checksum_(location_checksum) {}
 
   // Only compare the key elements, ignore the resolved classes.
@@ -35,6 +38,7 @@
     if (location_checksum_ != other.location_checksum_) {
       return static_cast<int>(location_checksum_ - other.location_checksum_);
     }
+    // Don't need to compare base_location_ since dex_location_ has more info.
     return dex_location_.compare(other.dex_location_);
   }
 
@@ -47,6 +51,10 @@
     return dex_location_;
   }
 
+  const std::string& GetBaseLocation() const {
+    return base_location_;
+  }
+
   uint32_t GetLocationChecksum() const {
     return location_checksum_;
   }
@@ -57,6 +65,7 @@
 
  private:
   const std::string dex_location_;
+  const std::string base_location_;
   const uint32_t location_checksum_;
   // Array of resolved class def indexes.
   mutable std::unordered_set<uint16_t> classes_;
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 4a0a6fc..63f3f08 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -62,7 +62,12 @@
 namespace art {
 
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
-const uint8_t DexFile::kDexMagicVersion[] = { '0', '3', '5', '\0' };
+const uint8_t DexFile::kDexMagicVersions[DexFile::kNumDexVersions][DexFile::kDexVersionLen] = {
+  {'0', '3', '5', '\0'},
+  // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
+  // files with that version number would erroneously be accepted and run.
+  {'0', '3', '7', '\0'}
+};
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != nullptr);
@@ -493,11 +498,16 @@
 
 bool DexFile::IsVersionValid(const uint8_t* magic) {
   const uint8_t* version = &magic[sizeof(kDexMagic)];
-  return (memcmp(version, kDexMagicVersion, sizeof(kDexMagicVersion)) == 0);
+  for (uint32_t i = 0; i < kNumDexVersions; i++) {
+    if (memcmp(version, kDexMagicVersions[i], kDexVersionLen) == 0) {
+      return true;
+    }
+  }
+  return false;
 }
 
-uint32_t DexFile::GetVersion() const {
-  const char* version = reinterpret_cast<const char*>(&GetHeader().magic_[sizeof(kDexMagic)]);
+uint32_t DexFile::Header::GetVersion() const {
+  const char* version = reinterpret_cast<const char*>(&magic_[sizeof(kDexMagic)]);
   return atoi(version);
 }
 
@@ -1341,6 +1351,17 @@
   return ProcessAnnotationSetRefList(method_class, set_ref_list, size);
 }
 
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForMethod(ArtMethod* method)
+    const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
+  return GetSignatureValue(method_class, annotation_set);
+}
+
 bool DexFile::IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class)
     const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
@@ -1538,6 +1559,15 @@
   return true;
 }
 
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForClass(
+    Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  return GetSignatureValue(klass, annotation_set);
+}
+
 bool DexFile::IsClassAnnotationPresent(Handle<mirror::Class> klass,
                                        Handle<mirror::Class> annotation_class) const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index e497e9c..ce7f62a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -57,8 +57,12 @@
 // TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
+  static const uint32_t kDefaultMethodsVersion = 37;
   static const uint8_t kDexMagic[];
-  static const uint8_t kDexMagicVersion[];
+  static constexpr size_t kNumDexVersions = 2;
+  static constexpr size_t kDexVersionLen = 4;
+  static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
+
   static constexpr size_t kSha1DigestSize = 20;
   static constexpr uint32_t kDexEndianConstant = 0x12345678;
 
@@ -71,7 +75,7 @@
   // The value of an invalid index.
   static const uint16_t kDexNoIndex16 = 0xFFFF;
 
-  // The separator charactor in MultiDex locations.
+  // The separator character in MultiDex locations.
   static constexpr char kMultiDexSeparator = ':';
 
   // A string version of the previous. This is a define so that we can merge string literals in the
@@ -104,6 +108,9 @@
     uint32_t data_size_;  // unused
     uint32_t data_off_;  // unused
 
+    // Decode the dex magic version
+    uint32_t GetVersion() const;
+
    private:
     DISALLOW_COPY_AND_ASSIGN(Header);
   };
@@ -476,7 +483,9 @@
   }
 
   // Decode the dex magic version
-  uint32_t GetVersion() const;
+  uint32_t GetVersion() const {
+    return GetHeader().GetVersion();
+  }
 
   // Returns true if the byte string points to the magic value.
   static bool IsMagicValid(const uint8_t* magic);
@@ -983,6 +992,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForMethod(ArtMethod* method) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1005,6 +1016,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass)
+      const SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class)
       const SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 9c9b8c5..bbffbbb 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -251,6 +251,14 @@
   return true;
 }
 
+bool DexFileVerifier::CheckSizeLimit(uint32_t size, uint32_t limit, const char* label) {
+  if (size > limit) {
+    ErrorStringPrintf("Size(%u) should not exceed limit(%u) for %s.", size, limit, label);
+    return false;
+  }
+  return true;
+}
+
 bool DexFileVerifier::CheckHeader() {
   // Check file size from the header.
   uint32_t expected_size = header_->file_size_;
@@ -298,10 +306,12 @@
                               header_->type_ids_size_,
                               4,
                               "type-ids") &&
+      CheckSizeLimit(header_->type_ids_size_, DexFile::kDexNoIndex16, "type-ids") &&
       CheckValidOffsetAndSize(header_->proto_ids_off_,
                               header_->proto_ids_size_,
                               4,
                               "proto-ids") &&
+      CheckSizeLimit(header_->proto_ids_size_, DexFile::kDexNoIndex16, "proto-ids") &&
       CheckValidOffsetAndSize(header_->field_ids_off_,
                               header_->field_ids_size_,
                               4,
@@ -1786,13 +1796,8 @@
       while (curr_it.HasNext() && prev_it.HasNext()) {
         uint16_t prev_idx = prev_it.GetTypeIdx();
         uint16_t curr_idx = curr_it.GetTypeIdx();
-        if (prev_idx == DexFile::kDexNoIndex16) {
-          break;
-        }
-        if (UNLIKELY(curr_idx == DexFile::kDexNoIndex16)) {
-          ErrorStringPrintf("Out-of-order proto_id arguments");
-          return false;
-        }
+        DCHECK_NE(prev_idx, DexFile::kDexNoIndex16);
+        DCHECK_NE(curr_idx, DexFile::kDexNoIndex16);
 
         if (prev_idx < curr_idx) {
           break;
@@ -1804,6 +1809,12 @@
         prev_it.Next();
         curr_it.Next();
       }
+      if (!curr_it.HasNext()) {
+        // Either a duplicate ProtoId or a ProtoId with a shorter argument list follows
+        // a ProtoId with a longer one. Both cases are forbidden by the specification.
+        ErrorStringPrintf("Out-of-order proto_id arguments");
+        return false;
+      }
     }
   }
 
@@ -2358,7 +2369,8 @@
 static std::string GetStringOrError(const uint8_t* const begin,
                                     const DexFile::Header* const header,
                                     uint32_t string_idx) {
-  if (header->string_ids_size_ < string_idx) {
+  // The `string_idx` is not guaranteed to be valid yet.
+  if (header->string_ids_size_ <= string_idx) {
     return "(error)";
   }
 
@@ -2375,9 +2387,11 @@
 static std::string GetClassOrError(const uint8_t* const begin,
                                    const DexFile::Header* const header,
                                    uint32_t class_idx) {
-  if (header->type_ids_size_ < class_idx) {
-    return "(error)";
-  }
+  // The `class_idx` is either `FieldId::class_idx_` or `MethodId::class_idx_` and
+  // it has already been checked in `DexFileVerifier::CheckClassDataItemField()`
+  // or `DexFileVerifier::CheckClassDataItemMethod()`, respectively, to match
+  // a valid defining class.
+  CHECK_LT(class_idx, header->type_ids_size_);
 
   const DexFile::TypeId* type_id =
       reinterpret_cast<const DexFile::TypeId*>(begin + header->type_ids_off_) + class_idx;
@@ -2390,9 +2404,8 @@
 static std::string GetFieldDescriptionOrError(const uint8_t* const begin,
                                               const DexFile::Header* const header,
                                               uint32_t idx) {
-  if (header->field_ids_size_ < idx) {
-    return "(error)";
-  }
+  // The `idx` has already been checked in `DexFileVerifier::CheckClassDataItemField()`.
+  CHECK_LT(idx, header->field_ids_size_);
 
   const DexFile::FieldId* field_id =
       reinterpret_cast<const DexFile::FieldId*>(begin + header->field_ids_off_) + idx;
@@ -2408,9 +2421,8 @@
 static std::string GetMethodDescriptionOrError(const uint8_t* const begin,
                                                const DexFile::Header* const header,
                                                uint32_t idx) {
-  if (header->method_ids_size_ < idx) {
-    return "(error)";
-  }
+  // The `idx` has already been checked in `DexFileVerifier::CheckClassDataItemMethod()`.
+  CHECK_LT(idx, header->method_ids_size_);
 
   const DexFile::MethodId* method_id =
       reinterpret_cast<const DexFile::MethodId*>(begin + header->method_ids_off_) + idx;
@@ -2465,7 +2477,13 @@
                                 GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
                                 field_access_flags,
                                 PrettyJavaAccessFlags(field_access_flags).c_str());
-      return false;
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
     // Interface fields may be synthetic, but may not have other flags.
     constexpr uint32_t kDisallowed = ~(kPublicFinalStatic | kAccSynthetic);
@@ -2474,7 +2492,13 @@
                                 GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
                                 field_access_flags,
                                 PrettyJavaAccessFlags(field_access_flags).c_str());
-      return false;
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
     return true;
   }
@@ -2596,7 +2620,13 @@
       *error_msg = StringPrintf("Constructor %" PRIu32 "(%s) is not flagged correctly wrt/ static.",
                                 method_index,
                                 GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
-      return false;
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                     << *error_msg;
+      }
     }
   }
   // Check that static and private methods, as well as constructors, are in the direct methods list,
@@ -2614,6 +2644,27 @@
   // From here on out it is easier to mask out the bits we're supposed to ignore.
   method_access_flags &= kMethodAccessFlags;
 
+  // Interfaces are special.
+  if ((class_access_flags & kAccInterface) != 0) {
+    // Non-static interface methods must be public or private.
+    uint32_t desired_flags = (kAccPublic | kAccStatic);
+    if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+      desired_flags |= kAccPrivate;
+    }
+    if ((method_access_flags & desired_flags) == 0) {
+      *error_msg = StringPrintf("Interface virtual method %" PRIu32 "(%s) is not public",
+          method_index,
+          GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                      << *error_msg;
+      }
+    }
+  }
+
   // If there aren't any instructions, make sure that's expected.
   if (!has_code) {
     // Only native or abstract methods may not have code.
@@ -2629,7 +2680,13 @@
       *error_msg = StringPrintf("Constructor %u(%s) must not be abstract or native",
                                 method_index,
                                 GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
-      return false;
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                      << *error_msg;
+      }
     }
     if ((method_access_flags & kAccAbstract) != 0) {
       // Abstract methods are not allowed to have the following flags.
@@ -2652,12 +2709,18 @@
     }
     // Interfaces are special.
     if ((class_access_flags & kAccInterface) != 0) {
-      // Interface methods must be public and abstract.
+      // Interface methods without code must be abstract.
       if ((method_access_flags & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
         *error_msg = StringPrintf("Interface method %" PRIu32 "(%s) is not public and abstract",
             method_index,
             GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
-        return false;
+        if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+          return false;
+        } else {
+          // Allow in older versions, but warn.
+          LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                       << *error_msg;
+        }
       }
       // At this point, we know the method is public and abstract. This means that all the checks
       // for invalid combinations above applies. In addition, interface methods must not be
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index be0e6d8..90409db 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -49,6 +49,8 @@
   // Checks whether the offset is zero (when size is zero) or that the offset falls within the area
   // claimed by the file.
   bool CheckValidOffsetAndSize(uint32_t offset, uint32_t size, size_t alignment, const char* label);
+  // Checks whether the size is less than the limit.
+  bool CheckSizeLimit(uint32_t size, uint32_t limit, const char* label);
   bool CheckIndex(uint32_t field, uint32_t limit, const char* label);
 
   bool CheckHeader();
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 44cf2ee..3741c1e 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -57,7 +57,14 @@
   255, 255, 255, 255
 };
 
-static inline uint8_t* DecodeBase64(const char* src, size_t* dst_size) {
+// Make the Dex file version 37.
+static void MakeDexVersion37(DexFile* dex_file) {
+  size_t offset = OFFSETOF_MEMBER(DexFile::Header, magic_) + 6;
+  CHECK_EQ(*(dex_file->Begin() + offset), '5');
+  *(const_cast<uint8_t*>(dex_file->Begin()) + offset) = '7';
+}
+
+static inline std::unique_ptr<uint8_t[]> DecodeBase64(const char* src, size_t* dst_size) {
   std::vector<uint8_t> tmp;
   uint32_t t = 0, y = 0;
   int g = 3;
@@ -100,7 +107,7 @@
     *dst_size = 0;
   }
   std::copy(tmp.begin(), tmp.end(), dst.get());
-  return dst.release();
+  return dst;
 }
 
 static void FixUpChecksum(uint8_t* dex_file) {
@@ -113,25 +120,18 @@
   header->checksum_ = adler_checksum;
 }
 
-// Custom deleter. Necessary to clean up the memory we use (to be able to mutate).
-struct DexFileDeleter {
-  void operator()(DexFile* in) {
-    if (in != nullptr) {
-      delete[] in->Begin();
-      delete in;
-    }
-  }
-};
-
-using DexFileUniquePtr = std::unique_ptr<DexFile, DexFileDeleter>;
-
 class DexFileVerifierTest : public CommonRuntimeTest {
  protected:
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
                           std::function<void(DexFile*)> f,
                           const char* expected_error) {
-    DexFileUniquePtr dex_file(WrapAsDexFile(dex_file_base64_content));
+    size_t length;
+    std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(dex_file_base64_content, &length);
+    CHECK(dex_bytes != nullptr);
+    // Note: `dex_file` will be destroyed before `dex_bytes`.
+    std::unique_ptr<DexFile> dex_file(
+        new DexFile(dex_bytes.get(), length, "tmp", 0, nullptr, nullptr));
     f(dex_file.get());
     FixUpChecksum(const_cast<uint8_t*>(dex_file->Begin()));
 
@@ -150,15 +150,6 @@
       }
     }
   }
-
- private:
-  static DexFile* WrapAsDexFile(const char* dex_file_content_in_base_64) {
-    // Decode base64.
-    size_t length;
-    uint8_t* dex_bytes = DecodeBase64(dex_file_content_in_base_64, &length);
-    CHECK(dex_bytes != nullptr);
-    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
-  }
 };
 
 static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
@@ -290,7 +281,9 @@
 // Find the method data for the first method with the given name (from class 0). Note: the pointer
 // is to the access flags, so that the caller doesn't have to handle the leb128-encoded method-index
 // delta.
-static const uint8_t* FindMethodData(const DexFile* dex_file, const char* name) {
+static const uint8_t* FindMethodData(const DexFile* dex_file,
+                                     const char* name,
+                                     /*out*/ uint32_t* method_idx = nullptr) {
   const DexFile::ClassDef& class_def = dex_file->GetClassDef(0);
   const uint8_t* class_data = dex_file->GetClassData(class_def);
 
@@ -316,6 +309,9 @@
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
+      if (method_idx != nullptr) {
+        *method_idx = method_index;
+      }
       DecodeUnsignedLeb128(&trailing);
       return trailing;
     }
@@ -449,6 +445,7 @@
         kMethodFlagsTestDex,
         "method_flags_constructor_native_nocode",
         [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
           ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
           ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
@@ -461,6 +458,7 @@
         kMethodFlagsTestDex,
         "method_flags_constructor_abstract_nocode",
         [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
           ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
           ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
@@ -521,6 +519,7 @@
       kMethodFlagsTestDex,
       "init_not_allowed_flags",
       [&](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
         ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
 
@@ -683,6 +682,22 @@
   }
 }
 
+TEST_F(DexFileVerifierTest, B28552165) {
+  // Regression test for bad error string retrieval in different situations.
+  // Using invalid access flags to trigger the error.
+  VerifyModification(
+      kMethodFlagsTestDex,
+      "b28552165",
+      [](DexFile* dex_file) {
+        OrMaskToMethodFlags(dex_file, "foo", kAccPublic | kAccProtected);
+        uint32_t method_idx;
+        FindMethodData(dex_file, "foo", &method_idx);
+        auto* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(method_idx));
+        method_id->name_idx_ = dex_file->NumStringIds();
+      },
+      "Method may have only one of public/protected/private, LMethodFlags;.(error)");
+}
+
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
@@ -733,6 +748,14 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
       nullptr);
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_ok37",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -742,7 +765,18 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
+
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_non_abstract",
@@ -781,7 +815,18 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
+
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_protected",
@@ -791,7 +836,18 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_protected",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
+      },
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
 
   constexpr uint32_t kAllMethodFlags =
       kAccPublic |
@@ -1070,6 +1126,14 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
       nullptr);
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
 
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
@@ -1079,7 +1143,18 @@
 
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_non_final",
@@ -1088,7 +1163,18 @@
 
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccFinal);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_final",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccFinal);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_protected",
@@ -1098,7 +1184,19 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToFieldFlags(dex_file, "foo", kAccProtected);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_protected",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccProtected);
+      },
       "Interface field is not public final static");
+
   VerifyModification(
       kFieldFlagsInterfaceTestDex,
       "field_flags_interface_private",
@@ -1108,6 +1206,17 @@
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToFieldFlags(dex_file, "foo", kAccPrivate);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_private",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccPrivate);
+      },
       "Interface field is not public final static");
 
   VerifyModification(
@@ -1152,6 +1261,21 @@
           }
           OrMaskToFieldFlags(dex_file, "foo", mask);
         },
+        nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+    VerifyModification(
+        kFieldFlagsInterfaceTestDex,
+        "field_flags_interface_disallowed",
+        [&](DexFile* dex_file) {
+          MakeDexVersion37(dex_file);
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+          uint32_t mask = ApplyMaskShifted(kInterfaceDisallowed, i);
+          if ((mask & kAccProtected) != 0) {
+            mask &= ~kAccProtected;
+            ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+          }
+          OrMaskToFieldFlags(dex_file, "foo", mask);
+        },
         "Interface field has disallowed flag");
   }
 }
@@ -1180,6 +1304,14 @@
       [](DexFile* dex_file) {
         ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
       },
+      nullptr);  // Should be allowed in older dex versions for backwards compatibility.
+  VerifyModification(
+      kFieldFlagsInterfaceBadTestDex,
+      "field_flags_interface_non_static",
+      [](DexFile* dex_file) {
+        MakeDexVersion37(dex_file);
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
       "Interface field is not public final static");
 }
 
@@ -1312,4 +1444,81 @@
   }
 }
 
+// Generated from
+//
+// .class LOverloading;
+//
+// .super Ljava/lang/Object;
+//
+// .method public static foo()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public static foo(I)V
+// .registers 1
+//     return-void
+// .end method
+static const char kProtoOrderingTestDex[] =
+    "ZGV4CjAzNQA1L+ABE6voQ9Lr4Ci//efB53oGnDr5PinsAQAAcAAAAHhWNBIAAAAAAAAAAFgBAAAG"
+    "AAAAcAAAAAQAAACIAAAAAgAAAJgAAAAAAAAAAAAAAAIAAACwAAAAAQAAAMAAAAAMAQAA4AAAAOAA"
+    "AADjAAAA8gAAAAYBAAAJAQAADQEAAAAAAAABAAAAAgAAAAMAAAADAAAAAwAAAAAAAAAEAAAAAwAA"
+    "ABQBAAABAAAABQAAAAEAAQAFAAAAAQAAAAAAAAACAAAAAAAAAP////8AAAAASgEAAAAAAAABSQAN"
+    "TE92ZXJsb2FkaW5nOwASTGphdmEvbGFuZy9PYmplY3Q7AAFWAAJWSQADZm9vAAAAAQAAAAAAAAAA"
+    "AAAAAAAAAAEAAAAAAAAAAAAAAAEAAAAOAAAAAQABAAAAAAAAAAAAAQAAAA4AAAACAAAJpAIBCbgC"
+    "AAAMAAAAAAAAAAEAAAAAAAAAAQAAAAYAAABwAAAAAgAAAAQAAACIAAAAAwAAAAIAAACYAAAABQAA"
+    "AAIAAACwAAAABgAAAAEAAADAAAAAAiAAAAYAAADgAAAAARAAAAEAAAAUAQAAAxAAAAIAAAAcAQAA"
+    "ASAAAAIAAAAkAQAAACAAAAEAAABKAQAAABAAAAEAAABYAQAA";
+
+TEST_F(DexFileVerifierTest, ProtoOrdering) {
+  {
+    // The input dex file should be good before modification.
+    ScratchFile tmp;
+    std::string error_msg;
+    std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kProtoOrderingTestDex,
+                                                         tmp.GetFilename().c_str(),
+                                                         &error_msg));
+    ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+  }
+
+  // Modify the order of the ProtoIds for two overloads of "foo" with the
+  // same return type and one having longer parameter list than the other.
+  for (size_t i = 0; i != 2; ++i) {
+    VerifyModification(
+        kProtoOrderingTestDex,
+        "proto_ordering",
+        [i](DexFile* dex_file) {
+          uint32_t method_idx;
+          const uint8_t* data = FindMethodData(dex_file, "foo", &method_idx);
+          CHECK(data != nullptr);
+          // There should be 2 methods called "foo".
+          CHECK_LT(method_idx + 1u, dex_file->NumMethodIds());
+          CHECK_EQ(dex_file->GetMethodId(method_idx).name_idx_,
+                   dex_file->GetMethodId(method_idx + 1).name_idx_);
+          CHECK_EQ(dex_file->GetMethodId(method_idx).proto_idx_ + 1u,
+                   dex_file->GetMethodId(method_idx + 1).proto_idx_);
+          // Their return types should be the same.
+          uint32_t proto1_idx = dex_file->GetMethodId(method_idx).proto_idx_;
+          const DexFile::ProtoId& proto1 = dex_file->GetProtoId(proto1_idx);
+          const DexFile::ProtoId& proto2 = dex_file->GetProtoId(proto1_idx + 1u);
+          CHECK_EQ(proto1.return_type_idx_, proto2.return_type_idx_);
+          // And the first should not have any parameters while the second should have some.
+          CHECK(!DexFileParameterIterator(*dex_file, proto1).HasNext());
+          CHECK(DexFileParameterIterator(*dex_file, proto2).HasNext());
+          if (i == 0) {
+            // Swap the proto parameters and shorties to break the ordering.
+            std::swap(const_cast<uint32_t&>(proto1.parameters_off_),
+                      const_cast<uint32_t&>(proto2.parameters_off_));
+            std::swap(const_cast<uint32_t&>(proto1.shorty_idx_),
+                      const_cast<uint32_t&>(proto2.shorty_idx_));
+          } else {
+            // Copy the proto parameters and shorty to create duplicate proto id.
+            const_cast<uint32_t&>(proto1.parameters_off_) = proto2.parameters_off_;
+            const_cast<uint32_t&>(proto1.shorty_idx_) = proto2.shorty_idx_;
+          }
+        },
+        "Out-of-order proto_id arguments");
+  }
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 5344cdd..fc62573 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -39,17 +39,22 @@
 
 namespace art {
 
+template <bool kResolve = true>
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
                                     const InlineInfo& inline_info,
+                                    const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
   SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint32_t method_index = inline_info.GetMethodIndexAtDepth(inlining_depth);
+  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
   InvokeType invoke_type = static_cast<InvokeType>(
-        inline_info.GetInvokeTypeAtDepth(inlining_depth));
+        inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth));
   ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
   if (!caller->IsRuntimeMethod()) {
     return caller;
   }
+  if (!kResolve) {
+    return nullptr;
+  }
 
   // The method in the dex cache can be the runtime method responsible for invoking
   // the stub that will then update the dex cache. Therefore, we need to do the
@@ -64,7 +69,10 @@
   if (inlining_depth == 0) {
     class_loader.Assign(outer_method->GetClassLoader());
   } else {
-    caller = GetResolvedMethod(outer_method, inline_info, inlining_depth - 1);
+    caller = GetResolvedMethod<kResolve>(outer_method,
+                                         inline_info,
+                                         encoding,
+                                         inlining_depth - 1);
     class_loader.Assign(caller->GetClassLoader());
   }
 
@@ -506,12 +514,18 @@
         CHECK(self->IsExceptionPending());
         return nullptr;
       } else if (!method_reference_class->IsInterface()) {
-        // It is not an interface.
-        mirror::Class* super_class = referring_class->GetSuperClass();
+        // It is not an interface. If the referring class is in the class hierarchy of the
+        // referenced class in the bytecode, we use its super class. Otherwise, we throw
+        // a NoSuchMethodError.
+        mirror::Class* super_class = nullptr;
+        if (method_reference_class->IsAssignableFrom(referring_class)) {
+          super_class = referring_class->GetSuperClass();
+        }
         uint16_t vtable_index = resolved_method->GetMethodIndex();
         if (access_check) {
           // Check existence of super class.
-          if (super_class == nullptr || !super_class->HasVTable() ||
+          if (super_class == nullptr ||
+              !super_class->HasVTable() ||
               vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) {
             // Behavior to agree with that of the verifier.
             ThrowNoSuchMethodError(type, resolved_method->GetDeclaringClass(),
@@ -685,8 +699,13 @@
       // Need to do full type resolution...
       return nullptr;
     } else if (!method_reference_class->IsInterface()) {
-      // It is not an interface.
-      mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+      // It is not an interface. If the referring class is in the class hierarchy of the
+      // referenced class in the bytecode, we use its super class. Otherwise, we cannot
+      // resolve the method.
+      if (!method_reference_class->IsAssignableFrom(referring_class)) {
+        return nullptr;
+      }
+      mirror::Class* super_class = referring_class->GetSuperClass();
       if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
         // The super class does not have the method.
         return nullptr;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 3dfad76..197caa1 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -272,16 +272,19 @@
   if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
     if (outer_method != nullptr) {
       const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
-      if (current_code->IsOptimized()) {
-        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(stack_map.IsValid());
-        if (stack_map.HasInlineInfo(encoding)) {
-          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-        }
+      DCHECK(current_code != nullptr);
+      DCHECK(current_code->IsOptimized());
+      uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+      CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
+      StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+      DCHECK(stack_map.IsValid());
+      if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
+        InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+        caller = GetResolvedMethod(outer_method,
+                                   inline_info,
+                                   encoding.inline_info_encoding,
+                                   inline_info.GetDepth(encoding.inline_info_encoding) - 1);
       }
     }
     if (kIsDebugBuild && do_caller_check) {
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index 4e01d80..f3a0d2f 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -77,6 +77,10 @@
 extern "C" void art_quick_lock_object(art::mirror::Object*);
 extern "C" void art_quick_unlock_object(art::mirror::Object*);
 
+// Lock entrypoints that do not inline any behavior (e.g., thin-locks).
+extern "C" void art_quick_lock_object_no_inline(art::mirror::Object*);
+extern "C" void art_quick_unlock_object_no_inline(art::mirror::Object*);
+
 // Math entrypoints.
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
new file mode 100644
index 0000000..5dafa8b
--- /dev/null
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
+#define ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
+
+#include "base/logging.h"
+#include "entrypoints/jni/jni_entrypoints.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
+#include "quick_alloc_entrypoints.h"
+#include "quick_default_externs.h"
+#include "quick_entrypoints.h"
+
+namespace art {
+
+void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
+  // JNI
+  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
+
+  // Alloc
+  ResetQuickAllocEntryPoints(qpoints);
+
+  // DexCache
+  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
+  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
+  qpoints->pInitializeType = art_quick_initialize_type;
+  qpoints->pResolveString = art_quick_resolve_string;
+
+  // Field
+  qpoints->pSet8Instance = art_quick_set8_instance;
+  qpoints->pSet8Static = art_quick_set8_static;
+  qpoints->pSet16Instance = art_quick_set16_instance;
+  qpoints->pSet16Static = art_quick_set16_static;
+  qpoints->pSet32Instance = art_quick_set32_instance;
+  qpoints->pSet32Static = art_quick_set32_static;
+  qpoints->pSet64Instance = art_quick_set64_instance;
+  qpoints->pSet64Static = art_quick_set64_static;
+  qpoints->pSetObjInstance = art_quick_set_obj_instance;
+  qpoints->pSetObjStatic = art_quick_set_obj_static;
+  qpoints->pGetByteInstance = art_quick_get_byte_instance;
+  qpoints->pGetBooleanInstance = art_quick_get_boolean_instance;
+  qpoints->pGetShortInstance = art_quick_get_short_instance;
+  qpoints->pGetCharInstance = art_quick_get_char_instance;
+  qpoints->pGet32Instance = art_quick_get32_instance;
+  qpoints->pGet64Instance = art_quick_get64_instance;
+  qpoints->pGetObjInstance = art_quick_get_obj_instance;
+  qpoints->pGetByteStatic = art_quick_get_byte_static;
+  qpoints->pGetBooleanStatic = art_quick_get_boolean_static;
+  qpoints->pGetShortStatic = art_quick_get_short_static;
+  qpoints->pGetCharStatic = art_quick_get_char_static;
+  qpoints->pGet32Static = art_quick_get32_static;
+  qpoints->pGet64Static = art_quick_get64_static;
+  qpoints->pGetObjStatic = art_quick_get_obj_static;
+
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
+  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
+
+  // JNI
+  qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  qpoints->pJniMethodEnd = JniMethodEnd;
+  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
+
+  // Locks
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    qpoints->pLockObject = art_quick_lock_object_no_inline;
+    qpoints->pUnlockObject = art_quick_unlock_object_no_inline;
+  } else {
+    qpoints->pLockObject = art_quick_lock_object;
+    qpoints->pUnlockObject = art_quick_unlock_object;
+  }
+
+  // Invocation
+  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
+  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
+  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
+  qpoints->pInvokeDirectTrampolineWithAccessCheck =
+      art_quick_invoke_direct_trampoline_with_access_check;
+  qpoints->pInvokeInterfaceTrampolineWithAccessCheck =
+      art_quick_invoke_interface_trampoline_with_access_check;
+  qpoints->pInvokeStaticTrampolineWithAccessCheck =
+      art_quick_invoke_static_trampoline_with_access_check;
+  qpoints->pInvokeSuperTrampolineWithAccessCheck =
+      art_quick_invoke_super_trampoline_with_access_check;
+  qpoints->pInvokeVirtualTrampolineWithAccessCheck =
+      art_quick_invoke_virtual_trampoline_with_access_check;
+
+  // Thread
+  qpoints->pTestSuspend = art_quick_test_suspend;
+
+  // Throws
+  qpoints->pDeliverException = art_quick_deliver_exception;
+  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
+  qpoints->pThrowDivZero = art_quick_throw_div_zero;
+  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
+  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
+  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+
+  // Deoptimize
+  qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 35f2102..e9cdbb7 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -327,14 +327,15 @@
 
     if (current_code->IsOptimized()) {
       CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-      StackMapEncoding encoding = code_info.ExtractEncoding();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
-      if (stack_map.HasInlineInfo(encoding)) {
+      if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
         InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-        return inline_info.GetDexPcAtDepth(inline_info.GetDepth() - 1);
+        return inline_info.GetDexPcAtDepth(encoding.inline_info_encoding,
+                                           inline_info.GetDepth(encoding.inline_info_encoding)-1);
       } else {
-        return stack_map.GetDexPc(encoding);
+        return stack_map.GetDexPc(encoding.stack_map_encoding);
       }
     } else {
       return current_code->ToDexPc(*caller_sp, outer_pc);
@@ -1037,9 +1038,14 @@
       } else {
         DCHECK_EQ(invoke_type, kSuper);
         CHECK(caller != nullptr) << invoke_type;
+        StackHandleScope<2> hs(self);
+        Handle<mirror::DexCache> dex_cache(
+            hs.NewHandle(caller->GetDeclaringClass()->GetDexCache()));
+        Handle<mirror::ClassLoader> class_loader(
+            hs.NewHandle(caller->GetDeclaringClass()->GetClassLoader()));
         // TODO Maybe put this into a mirror::Class function.
         mirror::Class* ref_class = linker->ResolveReferencedClassOfMethod(
-            self, called_method.dex_method_index, caller);
+            called_method.dex_method_index, dex_cache, class_loader);
         if (ref_class->IsInterface()) {
           called = ref_class->FindVirtualMethodForInterfaceSuper(called, sizeof(void*));
         } else {
@@ -2166,9 +2172,14 @@
     uint32_t imt_index = interface_method->GetDexMethodIndex();
     ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
         imt_index % mirror::Class::kImtSize, sizeof(void*));
-    DCHECK(conflict_method->IsRuntimeMethod()) << PrettyMethod(conflict_method);
-    ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
-    method = current_table->Lookup(interface_method);
+    if (LIKELY(conflict_method->IsRuntimeMethod())) {
+      ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+      DCHECK(current_table != nullptr);
+      method = current_table->Lookup(interface_method, sizeof(void*));
+    } else {
+      // It seems we aren't really a conflict method!
+      method = cls->FindVirtualMethodForInterface(interface_method, sizeof(void*));
+    }
     if (method != nullptr) {
       return GetTwoWordSuccessValue(
           reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode()),
@@ -2214,39 +2225,20 @@
   uint32_t imt_index = interface_method->GetDexMethodIndex();
   ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
       imt_index % mirror::Class::kImtSize, sizeof(void*));
-  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
-  Runtime* runtime = Runtime::Current();
-  LinearAlloc* linear_alloc = (cls->GetClassLoader() == nullptr)
-      ? runtime->GetLinearAlloc()
-      : cls->GetClassLoader()->GetAllocator();
-  bool is_new_entry = (conflict_method == runtime->GetImtConflictMethod());
-
-  // Create a new entry if the existing one is the shared conflict method.
-  ArtMethod* new_conflict_method = is_new_entry
-      ? runtime->CreateImtConflictMethod(linear_alloc)
-      : conflict_method;
-
-  // Allocate a new table. Note that we will leak this table at the next conflict,
-  // but that's a tradeoff compared to making the table fixed size.
-  void* data = linear_alloc->Alloc(
-      self, ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table));
-  CHECK(data != nullptr) << "Out of memory";
-  ImtConflictTable* new_table = new (data) ImtConflictTable(
-      current_table, interface_method, method);
-
-  // Do a fence to ensure threads see the data in the table before it is assigned
-  // to the conlict method.
-  // Note that there is a race in the presence of multiple threads and we may leak
-  // memory from the LinearAlloc, but that's a tradeoff compared to using
-  // atomic operations.
-  QuasiAtomic::ThreadFenceRelease();
-  new_conflict_method->SetImtConflictTable(new_table);
-  if (is_new_entry) {
-    // Update the IMT if we create a new conflict method. No fence needed here, as the
-    // data is consistent.
-    cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
-                                 new_conflict_method,
-                                 sizeof(void*));
+  if (conflict_method->IsRuntimeMethod()) {
+    ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
+        cls.Get(),
+        conflict_method,
+        interface_method,
+        method,
+        /*force_new_conflict_method*/false);
+    if (new_conflict_method != conflict_method) {
+      // Update the IMT if we create a new conflict method. No fence needed here, as the
+      // data is consistent.
+      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
+                                  new_conflict_method,
+                                  sizeof(void*));
+    }
   }
 
   const void* code = method->GetEntryPointFromQuickCompiledCode();
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5c5abeb..9f073a6 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -147,6 +147,10 @@
 }
 
 bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context) {
+  if (other_handlers_.empty()) {
+    return false;
+  }
+
   Thread* self = Thread::Current();
 
   DCHECK(self != nullptr);
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index d16afd9..4e40aea 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -210,7 +210,11 @@
     if (mod_union_table_->ShouldAddReference(root->AsMirrorPtr())) {
       *has_target_reference_ = true;
       // TODO: Add MarkCompressedReference callback here.
-      root->Assign(visitor_->MarkObject(root->AsMirrorPtr()));
+      mirror::Object* old_ref = root->AsMirrorPtr();
+      mirror::Object* new_ref = visitor_->MarkObject(old_ref);
+      if (old_ref != new_ref) {
+        root->Assign(new_ref);
+      }
     }
   }
 
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index e3714bb..d9f1507 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -20,7 +20,7 @@
 #include "base/stl_util.h"
 #include "stack.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/properties.h"
 #endif
 
@@ -38,7 +38,7 @@
 }
 
 void AllocRecordObjectMap::SetProperties() {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // Check whether there's a system property overriding the max number of records.
   const char* propertyName = "dalvik.vm.allocTrackerMax";
   char allocMaxString[PROPERTY_VALUE_MAX];
@@ -88,7 +88,7 @@
       max_stack_depth_ = value;
     }
   }
-#endif
+#endif  // ART_TARGET_ANDROID
 }
 
 AllocRecordObjectMap::~AllocRecordObjectMap() {
@@ -102,15 +102,15 @@
   // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the
   // klass_ fields as strong roots.
   for (auto it = entries_.rbegin(), end = entries_.rend(); it != end; ++it) {
-    AllocRecord* record = it->second;
+    AllocRecord& record = it->second;
     if (count > 0) {
-      buffered_visitor.VisitRootIfNonNull(record->GetClassGcRoot());
+      buffered_visitor.VisitRootIfNonNull(record.GetClassGcRoot());
       --count;
     }
     // Visit all of the stack frames to make sure no methods in the stack traces get unloaded by
     // class unloading.
-    for (size_t i = 0, depth = record->GetDepth(); i < depth; ++i) {
-      const AllocRecordStackTraceElement& element = record->StackElement(i);
+    for (size_t i = 0, depth = record.GetDepth(); i < depth; ++i) {
+      const AllocRecordStackTraceElement& element = record.StackElement(i);
       DCHECK(element.GetMethod() != nullptr);
       element.GetMethod()->VisitRoots(buffered_visitor, sizeof(void*));
     }
@@ -143,15 +143,14 @@
     ++count;
     // This does not need a read barrier because this is called by GC.
     mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>();
-    AllocRecord* record = it->second;
+    AllocRecord& record = it->second;
     mirror::Object* new_object = old_object == nullptr ? nullptr : visitor->IsMarked(old_object);
     if (new_object == nullptr) {
       if (count > delete_bound) {
         it->first = GcRoot<mirror::Object>(nullptr);
-        SweepClassObject(record, visitor);
+        SweepClassObject(&record, visitor);
         ++it;
       } else {
-        delete record;
         it = entries_.erase(it);
         ++count_deleted;
       }
@@ -160,7 +159,7 @@
         it->first = GcRoot<mirror::Object>(new_object);
         ++count_moved;
       }
-      SweepClassObject(record, visitor);
+      SweepClassObject(&record, visitor);
       ++it;
     }
   }
@@ -184,34 +183,32 @@
   new_record_condition_.Broadcast(Thread::Current());
 }
 
-struct AllocRecordStackVisitor : public StackVisitor {
-  AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max)
+class AllocRecordStackVisitor : public StackVisitor {
+ public:
+  AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        trace(trace_in),
-        max_depth(max) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+        max_depth_(max_depth),
+        trace_(trace_out) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
   bool VisitFrame() OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    if (depth >= max_depth) {
+    if (trace_->GetDepth() >= max_depth_) {
       return false;
     }
     ArtMethod* m = GetMethod();
-    if (!m->IsRuntimeMethod()) {
-      trace->SetStackElementAt(depth, m, GetDexPc());
-      ++depth;
+    // m may be null if we have inlined methods of unresolved classes. b/27858645
+    if (m != nullptr && !m->IsRuntimeMethod()) {
+      m = m->GetInterfaceMethodIfProxy(sizeof(void*));
+      trace_->AddStackElement(AllocRecordStackTraceElement(m, GetDexPc()));
     }
     return true;
   }
 
-  ~AllocRecordStackVisitor() {
-    trace->SetDepth(depth);
-  }
-
-  AllocRecordStackTrace* trace;
-  size_t depth = 0u;
-  const size_t max_depth;
+ private:
+  const size_t max_depth_;
+  AllocRecordStackTrace* const trace_;
 };
 
 void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) {
@@ -235,7 +232,6 @@
       if (self_name == "JDWP") {
         records->alloc_ddm_thread_id_ = self->GetTid();
       }
-      records->scratch_trace_.SetDepth(records->max_stack_depth_);
       size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ +
                   sizeof(AllocRecord) + sizeof(AllocRecordStackTrace);
       LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of "
@@ -265,27 +261,35 @@
   }
 }
 
-void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
+void AllocRecordObjectMap::RecordAllocation(Thread* self,
+                                            mirror::Object** obj,
                                             size_t byte_count) {
+  // Get stack trace outside of lock in case there are allocations during the stack walk.
+  // b/27858645.
+  AllocRecordStackTrace trace;
+  AllocRecordStackVisitor visitor(self, max_stack_depth_, /*out*/ &trace);
+  {
+    StackHandleScope<1> hs(self);
+    auto obj_wrapper = hs.NewHandleWrapper(obj);
+    visitor.WalkStack();
+  }
+
   MutexLock mu(self, *Locks::alloc_tracker_lock_);
-  Heap* heap = Runtime::Current()->GetHeap();
+  Heap* const heap = Runtime::Current()->GetHeap();
   if (!heap->IsAllocTrackingEnabled()) {
     // In the process of shutting down recording, bail.
     return;
   }
 
-  AllocRecordObjectMap* records = heap->GetAllocationRecords();
-  DCHECK(records != nullptr);
-
-  // Do not record for DDM thread
-  if (records->alloc_ddm_thread_id_ == self->GetTid()) {
+  // Do not record for DDM thread.
+  if (alloc_ddm_thread_id_ == self->GetTid()) {
     return;
   }
 
   // Wait for GC's sweeping to complete and allow new records
-  while (UNLIKELY((!kUseReadBarrier && !records->allow_new_record_) ||
+  while (UNLIKELY((!kUseReadBarrier && !allow_new_record_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
-    records->new_record_condition_.WaitHoldingLocks(self);
+    new_record_condition_.WaitHoldingLocks(self);
   }
 
   if (!heap->IsAllocTrackingEnabled()) {
@@ -294,28 +298,22 @@
     return;
   }
 
-  DCHECK_LE(records->Size(), records->alloc_record_max_);
+  DCHECK_LE(Size(), alloc_record_max_);
 
-  // Get stack trace.
-  // add scope to make "visitor" destroyed promptly, in order to set the scratch_trace_->depth_
-  {
-    AllocRecordStackVisitor visitor(self, &records->scratch_trace_, records->max_stack_depth_);
-    visitor.WalkStack();
-  }
-  records->scratch_trace_.SetTid(self->GetTid());
-  AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
+  // Erase extra unfilled elements.
+  trace.SetTid(self->GetTid());
 
-  // Fill in the basics.
-  AllocRecord* record = new AllocRecord(byte_count, klass, trace);
-
-  records->Put(obj, record);
-  DCHECK_LE(records->Size(), records->alloc_record_max_);
+  // Add the record.
+  Put(*obj, AllocRecord(byte_count, (*obj)->GetClass(), std::move(trace)));
+  DCHECK_LE(Size(), alloc_record_max_);
 }
 
 void AllocRecordObjectMap::Clear() {
-  STLDeleteValues(&entries_);
   entries_.clear();
 }
 
+AllocRecordObjectMap::AllocRecordObjectMap()
+    : new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 18cce4d..a2d86cc 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_ALLOCATION_RECORD_H_
 
 #include <list>
+#include <memory>
 
 #include "base/mutex.h"
 #include "object_callbacks.h"
@@ -37,10 +38,13 @@
 
 class AllocRecordStackTraceElement {
  public:
-  AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {}
-
   int32_t ComputeLineNumber() const SHARED_REQUIRES(Locks::mutator_lock_);
 
+  AllocRecordStackTraceElement() = default;
+  AllocRecordStackTraceElement(ArtMethod* method, uint32_t dex_pc)
+      : method_(method),
+        dex_pc_(dex_pc) {}
+
   ArtMethod* GetMethod() const {
     return method_;
   }
@@ -58,32 +62,27 @@
   }
 
   bool operator==(const AllocRecordStackTraceElement& other) const {
-    if (this == &other) return true;
     return method_ == other.method_ && dex_pc_ == other.dex_pc_;
   }
 
  private:
-  ArtMethod* method_;
-  uint32_t dex_pc_;
+  ArtMethod* method_ = nullptr;
+  uint32_t dex_pc_ = 0;
 };
 
 class AllocRecordStackTrace {
  public:
   static constexpr size_t kHashMultiplier = 17;
 
-  explicit AllocRecordStackTrace(size_t max_depth)
-      : tid_(0), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+  AllocRecordStackTrace() = default;
+
+  AllocRecordStackTrace(AllocRecordStackTrace&& r)
+      : tid_(r.tid_),
+        stack_(std::move(r.stack_)) {}
 
   AllocRecordStackTrace(const AllocRecordStackTrace& r)
-      : tid_(r.tid_), depth_(r.depth_), stack_(new AllocRecordStackTraceElement[r.depth_]) {
-    for (size_t i = 0; i < depth_; ++i) {
-      stack_[i] = r.stack_[i];
-    }
-  }
-
-  ~AllocRecordStackTrace() {
-    delete[] stack_;
-  }
+      : tid_(r.tid_),
+        stack_(r.stack_) {}
 
   pid_t GetTid() const {
     return tid_;
@@ -94,37 +93,32 @@
   }
 
   size_t GetDepth() const {
-    return depth_;
-  }
-
-  void SetDepth(size_t depth) {
-    depth_ = depth;
+    return stack_.size();
   }
 
   const AllocRecordStackTraceElement& GetStackElement(size_t index) const {
-    DCHECK_LT(index, depth_);
+    DCHECK_LT(index, GetDepth());
     return stack_[index];
   }
 
+  void AddStackElement(const AllocRecordStackTraceElement& element) {
+    stack_.push_back(element);
+  }
+
   void SetStackElementAt(size_t index, ArtMethod* m, uint32_t dex_pc) {
+    DCHECK_LT(index, stack_.size());
     stack_[index].SetMethod(m);
     stack_[index].SetDexPc(dex_pc);
   }
 
   bool operator==(const AllocRecordStackTrace& other) const {
     if (this == &other) return true;
-    if (tid_ != other.tid_) return false;
-    if (depth_ != other.depth_) return false;
-    for (size_t i = 0; i < depth_; ++i) {
-      if (!(stack_[i] == other.stack_[i])) return false;
-    }
-    return true;
+    return tid_ == other.tid_ && stack_ == other.stack_;
   }
 
  private:
-  pid_t tid_;
-  size_t depth_;
-  AllocRecordStackTraceElement* const stack_;
+  pid_t tid_ = 0;
+  std::vector<AllocRecordStackTraceElement> stack_;
 };
 
 struct HashAllocRecordTypes {
@@ -161,19 +155,15 @@
 class AllocRecord {
  public:
   // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap.
-  AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace* trace)
-      : byte_count_(count), klass_(klass), trace_(trace) {}
-
-  ~AllocRecord() {
-    delete trace_;
-  }
+  AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace&& trace)
+      : byte_count_(count), klass_(klass), trace_(std::move(trace)) {}
 
   size_t GetDepth() const {
-    return trace_->GetDepth();
+    return trace_.GetDepth();
   }
 
   const AllocRecordStackTrace* GetStackTrace() const {
-    return trace_;
+    return &trace_;
   }
 
   size_t ByteCount() const {
@@ -181,7 +171,7 @@
   }
 
   pid_t GetTid() const {
-    return trace_->GetTid();
+    return trace_.GetTid();
   }
 
   mirror::Class* GetClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -196,16 +186,15 @@
   }
 
   const AllocRecordStackTraceElement& StackElement(size_t index) const {
-    return trace_->GetStackElement(index);
+    return trace_.GetStackElement(index);
   }
 
  private:
   const size_t byte_count_;
   // The klass_ could be a strong or weak root for GC
   GcRoot<mirror::Class> klass_;
-  // TODO: Currently trace_ is like a std::unique_ptr,
-  // but in future with deduplication it could be a std::shared_ptr.
-  const AllocRecordStackTrace* const trace_;
+  // TODO: Share between alloc records with identical stack traces.
+  AllocRecordStackTrace trace_;
 };
 
 class AllocRecordObjectMap {
@@ -215,36 +204,29 @@
   // weak roots). The last recent_record_max_ number of pairs in the list are always kept for DDMS's
   // recent allocation tracking, but GcRoot<mirror::Object> pointers in these pairs can become null.
   // Both types of pointers need read barriers, do not directly access them.
-  typedef std::list<std::pair<GcRoot<mirror::Object>, AllocRecord*>> EntryList;
+  using EntryPair = std::pair<GcRoot<mirror::Object>, AllocRecord>;
+  typedef std::list<EntryPair> EntryList;
 
-  // "static" because it is part of double-checked locking. It needs to check a bool first,
-  // in order to make sure the AllocRecordObjectMap object is not null.
-  static void RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
-                               size_t byte_count)
+  // Caller needs to check that it is enabled before calling since we read the stack trace before
+  // checking the enabled boolean.
+  void RecordAllocation(Thread* self,
+                        mirror::Object** obj,
+                        size_t byte_count)
       REQUIRES(!Locks::alloc_tracker_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void SetAllocTrackingEnabled(bool enabled) REQUIRES(!Locks::alloc_tracker_lock_);
 
-  AllocRecordObjectMap() REQUIRES(Locks::alloc_tracker_lock_)
-      : alloc_record_max_(kDefaultNumAllocRecords),
-        recent_record_max_(kDefaultNumRecentRecords),
-        max_stack_depth_(kDefaultAllocStackDepth),
-        scratch_trace_(kMaxSupportedStackDepth),
-        alloc_ddm_thread_id_(0),
-        allow_new_record_(true),
-        new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {}
-
+  AllocRecordObjectMap() REQUIRES(Locks::alloc_tracker_lock_);
   ~AllocRecordObjectMap();
 
-  void Put(mirror::Object* obj, AllocRecord* record)
+  void Put(mirror::Object* obj, AllocRecord&& record)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_) {
     if (entries_.size() == alloc_record_max_) {
-      delete entries_.front().second;
       entries_.pop_front();
     }
-    entries_.emplace_back(GcRoot<mirror::Object>(obj), record);
+    entries_.push_back(EntryPair(GcRoot<mirror::Object>(obj), std::move(record)));
   }
 
   size_t Size() const SHARED_REQUIRES(Locks::alloc_tracker_lock_) {
@@ -313,12 +295,11 @@
   static constexpr size_t kDefaultNumRecentRecords = 64 * 1024 - 1;
   static constexpr size_t kDefaultAllocStackDepth = 16;
   static constexpr size_t kMaxSupportedStackDepth = 128;
-  size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_) = kDefaultNumAllocRecords;
+  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_) = kDefaultNumRecentRecords;
+  size_t max_stack_depth_ = kDefaultAllocStackDepth;
+  pid_t alloc_ddm_thread_id_  GUARDED_BY(Locks::alloc_tracker_lock_) = 0;
+  bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_) = true;
   ConditionVariable new_record_condition_ GUARDED_BY(Locks::alloc_tracker_lock_);
   // see the comment in typedef of EntryList
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 50e2622..c07da5d 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -35,7 +35,7 @@
 #include "../../external/dlmalloc/malloc.h"
 #pragma GCC diagnostic pop
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 // Define dlmalloc routines from bionic that cannot be included directly because of redefining
 // symbols from the include above.
 extern "C" void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index bd84d0d..375d869 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1021,7 +1021,7 @@
 
   // First mark slots to free in the bulk free bit map without locking the
   // size bracket locks. On host, unordered_set is faster than vector + flag.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   std::vector<Run*> runs;
 #else
   std::unordered_set<Run*, hash_run, eq_run> runs;
@@ -1088,7 +1088,7 @@
     DCHECK_EQ(run->magic_num_, kMagicNum);
     // Set the bit in the bulk free bit map.
     freed_bytes += run->AddToBulkFreeList(ptr);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     if (!run->to_be_bulk_freed_) {
       run->to_be_bulk_freed_ = true;
       runs.push_back(run);
@@ -1103,7 +1103,7 @@
   // union the bulk free bit map into the thread-local free bit map
   // (for thread-local runs.)
   for (Run* run : runs) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     DCHECK(run->to_be_bulk_freed_);
     run->to_be_bulk_freed_ = false;
 #endif
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index b61bef7..c19107a 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -231,7 +231,7 @@
  protected:
   // Returns object if the object is marked in the heap bitmap, otherwise null.
   virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_);
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void MarkObjectNonNull(mirror::Object* obj,
                          mirror::Object* holder = nullptr,
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index ae41226..4ffc8af 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -44,6 +44,8 @@
   kCollectorTypeInstrumentation,
   // Fake collector for adding or removing application image spaces.
   kCollectorTypeAddRemoveAppImageSpace,
+  // Fake collector used to implement exclusion between GC and debugger.
+  kCollectorTypeDebugger,
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 679432b..18e5703 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -35,6 +35,7 @@
     case kGcCauseTrim: return "HeapTrim";
     case kGcCauseInstrumentation: return "Instrumentation";
     case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
+    case kGcCauseDebugger: return "Debugger";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index c6b505c..ad67eb7 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -43,6 +43,8 @@
   kGcCauseInstrumentation,
   // Not a real GC cause, used to add or remove app image spaces.
   kGcCauseAddRemoveAppImageSpace,
+  // Not a real GC cause, used to implement exclusion between GC and debugger.
+  kGcCauseDebugger,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
 };
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 59fd4a6..6aed61a 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -176,8 +176,10 @@
   }
   if (kInstrumented) {
     if (IsAllocTrackingEnabled()) {
-      // Use obj->GetClass() instead of klass, because PushOnAllocationStack() could move klass
-      AllocRecordObjectMap::RecordAllocation(self, obj, obj->GetClass(), bytes_allocated);
+      // allocation_records_ is not null since it never becomes null after allocation tracking is
+      // enabled.
+      DCHECK(allocation_records_ != nullptr);
+      allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
     }
   } else {
     DCHECK(!IsAllocTrackingEnabled());
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 01db90a..fa540c0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -119,6 +119,8 @@
 // Dump the rosalloc stats on SIGQUIT.
 static constexpr bool kDumpRosAllocStatsOnSigQuit = false;
 
+static constexpr size_t kNativeAllocationHistogramBuckets = 16;
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -186,6 +188,11 @@
       total_objects_freed_ever_(0),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
+      native_histogram_lock_("Native allocation lock"),
+      native_allocation_histogram_("Native allocation sizes",
+                                   1U,
+                                   kNativeAllocationHistogramBuckets),
+      native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets),
       num_bytes_freed_revoke_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
@@ -1185,6 +1192,20 @@
     rosalloc_space_->DumpStats(os);
   }
 
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    if (native_allocation_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native allocation ";
+      native_allocation_histogram_.DumpBins(os);
+      os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n";
+    }
+    if (native_free_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native free ";
+      native_free_histogram_.DumpBins(os);
+      os << " bucket size " << native_free_histogram_.BucketWidth() << "\n";
+    }
+  }
+
   BaseMutex::DumpAll(os);
 }
 
@@ -1304,6 +1325,13 @@
 }
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
+  // If we're in a stack overflow, do not create a new exception. It would require running the
+  // constructor, which will of course still be in a stack overflow.
+  if (self->IsHandlingStackOverflow()) {
+    self->SetException(Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    return;
+  }
+
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
@@ -2680,8 +2708,8 @@
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
 
-  if ((gc_type == collector::kGcTypeFull) && runtime->UseJit()) {
-    // It's time to clear all inline caches, in case some classes can be unloaded.
+  // It's time to clear all inline caches, in case some classes can be unloaded.
+  if ((gc_type == collector::kGcTypeFull) && (runtime->GetJit() != nullptr)) {
     runtime->GetJit()->GetCodeCache()->ClearGcRootsInInlineCaches(self);
   }
 
@@ -3841,6 +3869,10 @@
 
 void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) {
   Thread* self = ThreadForEnv(env);
+  {
+    MutexLock mu(self, native_histogram_lock_);
+    native_allocation_histogram_.AddValue(bytes);
+  }
   if (native_need_to_run_finalization_) {
     RunFinalization(env, kNativeAllocationFinalizeTimeout);
     UpdateMaxNativeFootprint();
@@ -3885,6 +3917,10 @@
 
 void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) {
   size_t expected_size;
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    native_free_histogram_.AddValue(bytes);
+  }
   do {
     expected_size = native_bytes_allocated_.LoadRelaxed();
     if (UNLIKELY(bytes > expected_size)) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 2925591..2a1a4a1 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -241,9 +241,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RegisterNativeAllocation(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
   void RegisterNativeFree(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
 
   // Change the allocator, updates entrypoints.
   void ChangeAllocator(AllocatorType allocator)
@@ -532,7 +532,7 @@
   space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
 
   // Do a pending collector transition.
   void DoPendingCollectorTransition() REQUIRES(!*gc_complete_lock_);
@@ -654,7 +654,8 @@
   std::string SafePrettyTypeOf(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
   // GC performance measuring
-  void DumpGcPerformanceInfo(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpGcPerformanceInfo(std::ostream& os)
+      REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
   void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_);
 
   // Thread pool.
@@ -1156,6 +1157,11 @@
   // Bytes which are allocated and managed by native code but still need to be accounted for.
   Atomic<size_t> native_bytes_allocated_;
 
+  // Native allocation stats.
+  Mutex native_histogram_lock_;
+  Histogram<uint64_t> native_allocation_histogram_;
+  Histogram<uint64_t> native_free_histogram_;
+
   // Number of bytes freed by thread local buffer revokes. This will
   // cancel out the ahead-of-time bulk counting of bytes allocated in
   // rosalloc thread-local buffers.  It is temporarily accumulated
@@ -1326,8 +1332,7 @@
 
   // Allocation tracking support
   Atomic<bool> alloc_tracking_enabled_;
-  std::unique_ptr<AllocRecordObjectMap> allocation_records_
-      GUARDED_BY(Locks::alloc_tracker_lock_);
+  std::unique_ptr<AllocRecordObjectMap> allocation_records_;
 
   // GC stress related data structures.
   Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 22bf5f9..78c570f 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -407,7 +407,7 @@
                                     &is_global_cache);
   }
 
-  if (Runtime::Current()->IsZygote() && !secondary_image) {
+  if (is_zygote && !secondary_image) {
     MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
   }
 
@@ -444,7 +444,7 @@
             // Whether we can write to the cache.
             success = false;
           } else if (secondary_image) {
-            if (Runtime::Current()->IsZygote()) {
+            if (is_zygote) {
               // Secondary image is out of date. Clear cache and exit to let it retry from scratch.
               LOG(ERROR) << "Cannot patch secondary image '" << image_location
                          << "', clearing dalvik_cache and restarting zygote.";
@@ -503,7 +503,16 @@
       // descriptor (and the associated exclusive lock) to be released when
       // we leave Create.
       ScopedFlock image_lock;
-      image_lock.Init(image_filename->c_str(), error_msg);
+      // Should this be a RDWR lock? This is only a defensive measure, as at
+      // this point the image should exist.
+      // However, only the zygote can write into the global dalvik-cache, so
+      // restrict to zygote processes, or any process that isn't using
+      // /data/dalvik-cache (which we assume to be allowed to write there).
+      const bool rw_lock = is_zygote || !is_global_cache;
+      image_lock.Init(image_filename->c_str(),
+                      rw_lock ? (O_CREAT | O_RDWR) : O_RDONLY /* flags */,
+                      true /* block */,
+                      error_msg);
       VLOG(startup) << "Using image file " << image_filename->c_str() << " for image location "
                     << image_location;
       // If we are in /system we can assume the image is good. We can also
@@ -694,6 +703,11 @@
     return src;
   }
 
+  // Must be called on pointers that already have been relocated to the destination relocation.
+  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+  }
+
  protected:
   // Source section.
   const RelocationRange boot_image_;
@@ -708,36 +722,12 @@
   template<typename... Args>
   explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
 
-  // Must be called on pointers that already have been relocated to the destination relocation.
-  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
-    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
-  }
-
   template <typename T>
   T* operator()(T* obj) const {
     return ForwardObject(obj);
   }
 };
 
-class FixupClassVisitor : public FixupVisitor {
- public:
-  template<typename... Args>
-  explicit FixupClassVisitor(Args... args) : FixupVisitor(args...) {}
-
-  // The image space is contained so the GC doesn't need to know about it. Avoid requiring mutator
-  // lock to prevent possible pauses.
-  ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
-    DCHECK(klass != nullptr) << "Null class in image";
-    // No AsClass since our fields aren't quite fixed up yet.
-    mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
-    // Keep clean if possible.
-    if (klass != new_klass) {
-      obj->SetClass<kVerifyNone>(new_klass);
-    }
-  }
-};
-
 class FixupRootVisitor : public FixupVisitor {
  public:
   template<typename... Args>
@@ -763,12 +753,12 @@
 class FixupObjectVisitor : public FixupVisitor {
  public:
   template<typename... Args>
-  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* pointer_array_visited,
+  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited,
                               const size_t pointer_size,
                               Args... args)
       : FixupVisitor(args...),
         pointer_size_(pointer_size),
-        pointer_array_visited_(pointer_array_visited) {}
+        visited_(visited) {}
 
   // Fix up separately since we also need to fix up method entrypoints.
   ALWAYS_INLINE void VisitRootIfNonNull(
@@ -796,13 +786,20 @@
   // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
   // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
   template <typename Visitor>
-  void VisitPointerArray(mirror::PointerArray* array, const Visitor& visitor) const
+  void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const
       NO_THREAD_SAFETY_ANALYSIS {
-    if (array != nullptr &&
-        visitor.IsInAppImage(array) &&
-        !pointer_array_visited_->Test(array)) {
+    DCHECK(array != nullptr);
+    DCHECK(visitor.IsInAppImage(array));
+    // The bit for the array contents is different than the bit for the array. Since we may have
+    // already visited the array as a long / int array from walking the bitmap without knowing it
+    // was a pointer array.
+    static_assert(kObjectAlignment == 8u, "array bit may be in another object");
+    mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<uintptr_t>(array) + kObjectAlignment);
+    // If the bit is not set then the contents have not yet been updated.
+    if (!visited_->Test(contents_bit)) {
       array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor);
-      pointer_array_visited_->Set(array);
+      visited_->Set(contents_bit);
     }
   }
 
@@ -815,26 +812,61 @@
         ForwardObject(obj));
   }
 
-  ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    if (visited_->Test(obj)) {
+      // Already visited.
+      return;
+    }
+    visited_->Set(obj);
+
+    // Handle class specially first since we need it to be updated to properly visit the rest of
+    // the instance fields.
+    {
+      mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+      DCHECK(klass != nullptr) << "Null class in image";
+      // No AsClass since our fields aren't quite fixed up yet.
+      mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
+      if (klass != new_klass) {
+        obj->SetClass<kVerifyNone>(new_klass);
+      }
+      if (new_klass != klass && IsInAppImage(new_klass)) {
+        // Make sure the klass contents are fixed up since we depend on it to walk the fields.
+        operator()(new_klass);
+      }
+    }
+
     obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
         *this,
         *this);
+    // Note that this code relies on no circular dependencies.
     // We want to use our own class loader and not the one in the image.
     if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
-      mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+      mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
-      klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, pointer_size_, visitor);
+      as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass,
+                                                                      pointer_size_,
+                                                                      visitor);
       // Deal with the pointer arrays. Use the helper function since multiple classes can reference
       // the same arrays.
-      VisitPointerArray(klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(), visitor);
-      mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
-      if (iftable != nullptr) {
+      mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
+      if (vtable != nullptr && IsInAppImage(vtable)) {
+        operator()(vtable);
+        UpdatePointerArrayContents(vtable, visitor);
+      }
+      mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
+      // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
+      // contents.
+      if (iftable != nullptr && IsInAppImage(iftable)) {
+        operator()(iftable);
         for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
           if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
-            DCHECK(methods != nullptr);
-            VisitPointerArray(methods, visitor);
+            if (visitor.IsInAppImage(methods)) {
+              operator()(methods);
+              DCHECK(methods != nullptr);
+              UpdatePointerArrayContents(methods, visitor);
+            }
           }
         }
       }
@@ -843,12 +875,12 @@
 
  private:
   const size_t pointer_size_;
-  gc::accounting::ContinuousSpaceBitmap* const pointer_array_visited_;
+  gc::accounting::ContinuousSpaceBitmap* const visited_;
 };
 
 class ForwardObjectAdapter {
  public:
-  ALWAYS_INLINE ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
+  ALWAYS_INLINE explicit ForwardObjectAdapter(const FixupVisitor* visitor) : visitor_(visitor) {}
 
   template <typename T>
   ALWAYS_INLINE T* operator()(T* src) const {
@@ -861,7 +893,7 @@
 
 class ForwardCodeAdapter {
  public:
-  ALWAYS_INLINE ForwardCodeAdapter(const FixupVisitor* visitor)
+  ALWAYS_INLINE explicit ForwardCodeAdapter(const FixupVisitor* visitor)
       : visitor_(visitor) {}
 
   template <typename T>
@@ -882,10 +914,26 @@
         pointer_size_(pointer_size) {}
 
   virtual void Visit(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
-    if (fixup_heap_objects_) {
-      method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this), pointer_size_);
+    // TODO: Separate visitor for runtime vs normal methods.
+    if (UNLIKELY(method->IsRuntimeMethod())) {
+      ImtConflictTable* table = method->GetImtConflictTable(pointer_size_);
+      if (table != nullptr) {
+        ImtConflictTable* new_table = ForwardObject(table);
+        if (table != new_table) {
+          method->SetImtConflictTable(new_table, pointer_size_);
+        }
+      }
+      const void* old_code = method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+      const void* new_code = ForwardCode(old_code);
+      if (old_code != new_code) {
+        method->SetEntryPointFromQuickCompiledCodePtrSize(new_code, pointer_size_);
+      }
+    } else {
+      if (fixup_heap_objects_) {
+        method->UpdateObjectsForImageRelocation(ForwardObjectAdapter(this), pointer_size_);
+      }
+      method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this), pointer_size_);
     }
-    method->UpdateEntrypoints<kWithoutReadBarrier>(ForwardCodeAdapter(this), pointer_size_);
   }
 
  private:
@@ -929,9 +977,14 @@
   const size_t pointer_size = image_header.GetPointerSize();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
-  CHECK_NE(boot_image_begin, boot_image_end)
-      << "Can not relocate app image without boot image space";
-  CHECK_NE(boot_oat_begin, boot_oat_end) << "Can not relocate app image without boot oat file";
+  if (boot_image_begin == boot_image_end) {
+    *error_msg = "Can not relocate app image without boot image space";
+    return false;
+  }
+  if (boot_oat_begin == boot_oat_end) {
+    *error_msg = "Can not relocate app image without boot oat file";
+    return false;
+  }
   const uint32_t boot_image_size = boot_image_end - boot_image_begin;
   const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
   const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
@@ -981,11 +1034,12 @@
   const ImageSection& objects_section = image_header.GetImageSection(ImageHeader::kSectionObjects);
   uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
   uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
+  FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
   if (fixup_image) {
     // Two pass approach, fix up all classes first, then fix up non class-objects.
     // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
     std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
-        gc::accounting::ContinuousSpaceBitmap::Create("Pointer array bitmap",
+        gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap",
                                                       target_base,
                                                       image_header.GetImageSize()));
     FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
@@ -995,16 +1049,11 @@
                                             app_image,
                                             app_oat);
     TimingLogger::ScopedTiming timing("Fixup classes", &logger);
-    // Fixup class only touches app image classes, don't need the mutator lock since the space is
-    // not yet visible to the GC.
-    FixupClassVisitor fixup_class_visitor(boot_image, boot_oat, app_image, app_oat);
-    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_class_visitor);
     // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
     // its probably not required.
     ScopedObjectAccess soa(Thread::Current());
     timing.NewTiming("Fixup objects");
     bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_object_visitor);
-    FixupObjectAdapter fixup_adapter(boot_image, boot_oat, app_image, app_oat);
     // Fixup image roots.
     CHECK(app_image.InSource(reinterpret_cast<uintptr_t>(
         image_header.GetImageRoots<kWithoutReadBarrier>())));
@@ -1071,19 +1120,18 @@
                                          boot_oat,
                                          app_image,
                                          app_oat);
-    image_header.GetImageSection(ImageHeader::kSectionArtMethods).VisitPackedArtMethods(
-        &method_visitor,
-        target_base,
-        pointer_size);
+    image_header.VisitPackedArtMethods(&method_visitor, target_base, pointer_size);
   }
   if (fixup_image) {
     {
       // Only touches objects in the app image, no need for mutator lock.
       TimingLogger::ScopedTiming timing("Fixup fields", &logger);
       FixupArtFieldVisitor field_visitor(boot_image, boot_oat, app_image, app_oat);
-      image_header.GetImageSection(ImageHeader::kSectionArtFields).VisitPackedArtFields(
-          &field_visitor,
-          target_base);
+      image_header.VisitPackedArtFields(&field_visitor, target_base);
+    }
+    {
+      TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
+      image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
     }
     // In the app image case, the image methods are actually in the boot image.
     image_header.RelocateImageMethods(boot_image.Delta());
diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h
index 5237466..eac52f7 100644
--- a/runtime/gc/space/image_space_fs.h
+++ b/runtime/gc/space/image_space_fs.h
@@ -62,7 +62,7 @@
         if (recurse) {
           DeleteDirectoryContents(file, recurse);
           // Try to rmdir the directory.
-          if (TEMP_FAILURE_RETRY(rmdir(file.c_str())) != 0) {
+          if (rmdir(file.c_str()) != 0) {
             PLOG(ERROR) << "Unable to rmdir " << file;
           }
         }
@@ -71,12 +71,12 @@
       }
     } else {
       // Try to unlink the file.
-      if (TEMP_FAILURE_RETRY(unlink(file.c_str())) != 0) {
+      if (unlink(file.c_str()) != 0) {
         PLOG(ERROR) << "Unable to unlink " << file;
       }
     }
   }
-  CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+  CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
 }
 
 static bool HasContent(const char* dir) {
@@ -95,10 +95,10 @@
       continue;
     }
     // Something here.
-    CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+    CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
     return true;
   }
-  CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+  CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
   return false;
 }
 
@@ -115,7 +115,7 @@
     }
   }
   if (OS::DirectoryExists(dir.c_str())) {
-    if (TEMP_FAILURE_RETRY(rmdir(dir.c_str())) != 0) {
+    if (rmdir(dir.c_str()) != 0) {
       PLOG(ERROR) << "Unable to rmdir " << dir;
       return;
     }
@@ -136,7 +136,7 @@
     return;
   }
 
-  if (TEMP_FAILURE_RETRY(rename(src, trg)) != 0) {
+  if (rename(src, trg) != 0) {
     PLOG(ERROR) << "Could not rename OTA cache " << src << " to target " << trg;
   }
 }
diff --git a/runtime/gc_map.h b/runtime/gc_map.h
deleted file mode 100644
index b4ccdd6..0000000
--- a/runtime/gc_map.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_GC_MAP_H_
-#define ART_RUNTIME_GC_MAP_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-
-namespace art {
-
-// Lightweight wrapper for native PC offset to reference bit maps.
-class NativePcOffsetToReferenceMap {
- public:
-  explicit NativePcOffsetToReferenceMap(const uint8_t* data) : data_(data) {
-    CHECK(data_ != nullptr);
-  }
-
-  // The number of entries in the table.
-  size_t NumEntries() const {
-    return data_[2] | (data_[3] << 8);
-  }
-
-  // Return address of bitmap encoding what are live references.
-  const uint8_t* GetBitMap(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    return &Table()[entry_offset + NativeOffsetWidth()];
-  }
-
-  // Get the native PC encoded in the table at the given index.
-  uintptr_t GetNativePcOffset(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    uintptr_t result = 0;
-    for (size_t i = 0; i < NativeOffsetWidth(); ++i) {
-      result |= Table()[entry_offset + i] << (i * 8);
-    }
-    return result;
-  }
-
-  // Does the given offset have an entry?
-  bool HasEntry(uintptr_t native_pc_offset) {
-    for (size_t i = 0; i < NumEntries(); ++i) {
-      if (GetNativePcOffset(i) == native_pc_offset) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Finds the bitmap associated with the native pc offset.
-  const uint8_t* FindBitMap(uintptr_t native_pc_offset) {
-    size_t num_entries = NumEntries();
-    size_t index = Hash(native_pc_offset) % num_entries;
-    size_t misses = 0;
-    while (GetNativePcOffset(index) != native_pc_offset) {
-      index = (index + 1) % num_entries;
-      misses++;
-      DCHECK_LT(misses, num_entries) << "Failed to find offset: " << native_pc_offset;
-    }
-    return GetBitMap(index);
-  }
-
-  static uint32_t Hash(uint32_t native_offset) {
-    uint32_t hash = native_offset;
-    hash ^= (hash >> 20) ^ (hash >> 12);
-    hash ^= (hash >> 7) ^ (hash >> 4);
-    return hash;
-  }
-
-  // The number of bytes used to encode registers.
-  size_t RegWidth() const {
-    return (static_cast<size_t>(data_[0]) | (static_cast<size_t>(data_[1]) << 8)) >> 3;
-  }
-
- private:
-  // Skip the size information at the beginning of data.
-  const uint8_t* Table() const {
-    return data_ + 4;
-  }
-
-  // Number of bytes used to encode a native offset.
-  size_t NativeOffsetWidth() const {
-    return data_[0] & 7;
-  }
-
-  // The width of an entry in the table.
-  size_t EntryWidth() const {
-    return NativeOffsetWidth() + RegWidth();
-  }
-
-  const uint8_t* const data_;  // The header and table data
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_GC_MAP_H_
diff --git a/runtime/globals.h b/runtime/globals.h
index e7ea6f3..477cbdf 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -51,11 +51,31 @@
 static constexpr bool kIsDebugBuild = true;
 #endif
 
-// Whether or not this is a target (vs host) build. Useful in conditionals where ART_TARGET isn't.
+// ART_TARGET - Defined for target builds of ART.
+// ART_TARGET_LINUX - Defined for target Linux builds of ART.
+// ART_TARGET_ANDROID - Defined for target Android builds of ART.
+// Note: Either ART_TARGET_LINUX or ART_TARGET_ANDROID need to be set when ART_TARGET is set.
+// Note: When ART_TARGET_LINUX is defined mem_map.h will not be using Ashmem for memory mappings
+// (usually only available on Android kernels).
 #if defined(ART_TARGET)
+// Useful in conditionals where ART_TARGET isn't.
 static constexpr bool kIsTargetBuild = true;
+#if defined(ART_TARGET_LINUX)
+static constexpr bool kIsTargetLinux = true;
+#elif defined(ART_TARGET_ANDROID)
+static constexpr bool kIsTargetLinux = false;
+#else
+#error "Either ART_TARGET_LINUX or ART_TARGET_ANDROID needs to be defined for target builds."
+#endif
 #else
 static constexpr bool kIsTargetBuild = false;
+#if defined(ART_TARGET_LINUX)
+#error "ART_TARGET_LINUX defined for host build."
+#elif defined(ART_TARGET_ANDROID)
+#error "ART_TARGET_ANDROID defined for host build."
+#else
+static constexpr bool kIsTargetLinux = false;
+#endif
 #endif
 
 // Garbage collector constants.
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index bb35ec7..9895395 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -505,6 +505,7 @@
     // Walk the roots and the heap.
     output_->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
 
+    simple_roots_.clear();
     runtime->VisitRoots(this);
     runtime->VisitImageRoots(this);
     runtime->GetHeap()->VisitObjectsPaused(VisitObjectCallback, this);
@@ -828,7 +829,7 @@
         continue;
       }
       ++count;
-      const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace();
+      const gc::AllocRecordStackTrace* trace = it->second.GetStackTrace();
 
       // Copy the pair into a real hash map to speed up look up.
       auto records_result = allocation_records_.emplace(obj, trace);
@@ -884,6 +885,14 @@
                      gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>> frames_;
   std::unordered_map<const mirror::Object*, const gc::AllocRecordStackTrace*> allocation_records_;
 
+  // Set used to keep track of what simple root records we have already
+  // emitted, to avoid emitting duplicate entries. The simple root records are
+  // those that contain no other information than the root type and the object
+  // id. A pair of root type and object id is packed into a uint64_t, with
+  // the root type in the upper 32 bits and the object id in the lower 32
+  // bits.
+  std::unordered_set<uint64_t> simple_roots_;
+
   friend class GcRootVisitor;
   DISALLOW_COPY_AND_ASSIGN(Hprof);
 };
@@ -962,10 +971,14 @@
     case HPROF_ROOT_MONITOR_USED:
     case HPROF_ROOT_INTERNED_STRING:
     case HPROF_ROOT_DEBUGGER:
-    case HPROF_ROOT_VM_INTERNAL:
-      __ AddU1(heap_tag);
-      __ AddObjectId(obj);
+    case HPROF_ROOT_VM_INTERNAL: {
+      uint64_t key = (static_cast<uint64_t>(heap_tag) << 32) | PointerToLowMemUInt32(obj);
+      if (simple_roots_.insert(key).second) {
+        __ AddU1(heap_tag);
+        __ AddObjectId(obj);
+      }
       break;
+    }
 
       // ID: object ID
       // ID: JNI global ref ID
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index e3307d8..ea75a62 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -19,6 +19,8 @@
 
 #include "image.h"
 
+#include "art_method.h"
+
 namespace art {
 
 template <ReadBarrierOption kReadBarrierOption>
@@ -42,6 +44,20 @@
   return image_roots;
 }
 
+template <typename Visitor>
+inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
+                                                      uint8_t* base,
+                                                      size_t pointer_size) const {
+  const ImageSection& section = GetImageSection(kSectionIMTConflictTables);
+  for (size_t pos = 0; pos < section.Size(); ) {
+    auto* table = reinterpret_cast<ImtConflictTable*>(base + section.Offset() + pos);
+    table->Visit([&visitor](const std::pair<ArtMethod*, ArtMethod*>& methods) {
+      return std::make_pair(visitor(methods.first), visitor(methods.second));
+    }, pointer_size);
+    pos += table->ComputeSize(pointer_size);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_IMAGE_INL_H_
diff --git a/runtime/image.cc b/runtime/image.cc
index 1f54e3e..a9552c2 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '7', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '9', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -147,9 +147,10 @@
   return os << "size=" << section.Size() << " range=" << section.Offset() << "-" << section.End();
 }
 
-void ImageSection::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
-  for (size_t pos = 0; pos < Size(); ) {
-    auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + Offset() + pos);
+void ImageHeader::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
+  const ImageSection& fields = GetFieldsSection();
+  for (size_t pos = 0; pos < fields.Size(); ) {
+    auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + fields.Offset() + pos);
     for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, sizeof(ArtField)));
     }
@@ -157,18 +158,25 @@
   }
 }
 
-void ImageSection::VisitPackedArtMethods(ArtMethodVisitor* visitor,
-                                         uint8_t* base,
-                                         size_t pointer_size) const {
+void ImageHeader::VisitPackedArtMethods(ArtMethodVisitor* visitor,
+                                        uint8_t* base,
+                                        size_t pointer_size) const {
   const size_t method_alignment = ArtMethod::Alignment(pointer_size);
   const size_t method_size = ArtMethod::Size(pointer_size);
-  for (size_t pos = 0; pos < Size(); ) {
-    auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + Offset() + pos);
+  const ImageSection& methods = GetMethodsSection();
+  for (size_t pos = 0; pos < methods.Size(); ) {
+    auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + methods.Offset() + pos);
     for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, method_size, method_alignment));
     }
     pos += array->ComputeSize(array->size(), method_size, method_alignment);
   }
+  const ImageSection& runtime_methods = GetRuntimeMethodsSection();
+  for (size_t pos = 0; pos < runtime_methods.Size(); ) {
+    auto* method = reinterpret_cast<ArtMethod*>(base + runtime_methods.Offset() + pos);
+    visitor->Visit(method);
+    pos += method_size;
+  }
 }
 
 }  // namespace art
diff --git a/runtime/image.h b/runtime/image.h
index 8e5dbad..2ea9af7 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -64,12 +64,6 @@
     return offset - offset_ < size_;
   }
 
-  // Visit ArtMethods in the section starting at base.
-  void VisitPackedArtMethods(ArtMethodVisitor* visitor, uint8_t* base, size_t pointer_size) const;
-
-  // Visit ArtMethods in the section starting at base.
-  void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
-
  private:
   uint32_t offset_;
   uint32_t size_;
@@ -200,6 +194,8 @@
     kSectionObjects,
     kSectionArtFields,
     kSectionArtMethods,
+    kSectionRuntimeMethods,
+    kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
     kSectionClassTable,
@@ -211,10 +207,19 @@
   void SetImageMethod(ImageMethod index, ArtMethod* method);
 
   const ImageSection& GetImageSection(ImageSections index) const;
+
   const ImageSection& GetMethodsSection() const {
     return GetImageSection(kSectionArtMethods);
   }
 
+  const ImageSection& GetRuntimeMethodsSection() const {
+    return GetImageSection(kSectionRuntimeMethods);
+  }
+
+  const ImageSection& GetFieldsSection() const {
+    return GetImageSection(ImageHeader::kSectionArtFields);
+  }
+
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetImageRoot(ImageRoot image_root) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -265,6 +270,19 @@
     return boot_image_size_ != 0u;
   }
 
+  // Visit ArtMethods in the section starting at base. Includes runtime methods.
+  // TODO: Delete base parameter if it is always equal to GetImageBegin.
+  void VisitPackedArtMethods(ArtMethodVisitor* visitor, uint8_t* base, size_t pointer_size) const;
+
+  // Visit ArtMethods in the section starting at base.
+  // TODO: Delete base parameter if it is always equal to GetImageBegin.
+  void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
+
+  template <typename Visitor>
+  void VisitPackedImtConflictTables(const Visitor& visitor,
+                                    uint8_t* base,
+                                    size_t pointer_size) const;
+
  private:
   static const uint8_t kImageMagic[4];
   static const uint8_t kImageVersion[4];
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index a0c6bfb..34bc458 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -80,7 +80,7 @@
       have_exception_caught_listeners_(false),
       have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
-      deoptimized_methods_lock_("deoptimized methods lock"),
+      deoptimized_methods_lock_("deoptimized methods lock", kDeoptimizedMethodsLock),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
       quick_alloc_entry_points_instrumentation_counter_(0),
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index d07f47b..a4c3d41 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -303,7 +303,8 @@
   bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
-        have_exception_caught_listeners_ || have_method_unwind_listeners_;
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_;
   }
 
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 74a2532..eceb593 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -97,6 +97,17 @@
   return LookupStrongLocked(s);
 }
 
+mirror::String* InternTable::LookupStrong(Thread* self,
+                                          uint32_t utf16_length,
+                                          const char* utf8_data) {
+  DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
+  Utf8String string(utf16_length,
+                    utf8_data,
+                    ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length));
+  MutexLock mu(self, *Locks::intern_table_lock_);
+  return strong_interns_.Find(string);
+}
+
 mirror::String* InternTable::LookupWeakLocked(mirror::String* s) {
   return weak_interns_.Find(s);
 }
@@ -365,9 +376,27 @@
   return a.Read()->Equals(b.Read());
 }
 
+bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
+                                               const Utf8String& b) const {
+  if (kIsDebugBuild) {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
+  mirror::String* a_string = a.Read();
+  uint32_t a_length = static_cast<uint32_t>(a_string->GetLength());
+  if (a_length != b.GetUtf16Length()) {
+    return false;
+  }
+  const uint16_t* a_value = a_string->GetValue();
+  return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0;
+}
+
 size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) {
   size_t read_count = 0;
   UnorderedSet set(ptr, /*make copy*/false, &read_count);
+  if (set.Empty()) {
+    // Avoid inserting empty sets.
+    return read_count;
+  }
   // TODO: Disable this for app images if app images have intern tables.
   static constexpr bool kCheckDuplicates = true;
   if (kCheckDuplicates) {
@@ -375,7 +404,7 @@
       CHECK(Find(string.Read()) == nullptr) << "Already found " << string.Read()->ToModifiedUtf8();
     }
   }
-  // Insert at the front since we insert into the back.
+  // Insert at the front since we add new interns into the back.
   tables_.insert(tables_.begin(), std::move(set));
   return read_count;
 }
@@ -421,6 +450,17 @@
   return nullptr;
 }
 
+mirror::String* InternTable::Table::Find(const Utf8String& string) {
+  Locks::intern_table_lock_->AssertHeld(Thread::Current());
+  for (UnorderedSet& table : tables_) {
+    auto it = table.Find(string);
+    if (it != table.end()) {
+      return it->Read();
+    }
+  }
+  return nullptr;
+}
+
 void InternTable::Table::AddNewTable() {
   tables_.push_back(UnorderedSet());
 }
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 274f5ad..f845de5 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -88,6 +88,9 @@
   mirror::String* LookupStrong(Thread* self, mirror::String* s)
       REQUIRES(!Locks::intern_table_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::String* LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
+      REQUIRES(!Locks::intern_table_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Lookup a weak intern, returns null if not found.
   mirror::String* LookupWeak(Thread* self, mirror::String* s)
@@ -136,11 +139,32 @@
       REQUIRES(!Locks::intern_table_lock_);
 
  private:
+  // Modified UTF-8-encoded string treated as UTF16.
+  class Utf8String {
+   public:
+    Utf8String(uint32_t utf16_length, const char* utf8_data, int32_t hash)
+        : hash_(hash), utf16_length_(utf16_length), utf8_data_(utf8_data) { }
+
+    int32_t GetHash() const { return hash_; }
+    uint32_t GetUtf16Length() const { return utf16_length_; }
+    const char* GetUtf8Data() const { return utf8_data_; }
+
+   private:
+    int32_t hash_;
+    uint32_t utf16_length_;
+    const char* utf8_data_;
+  };
+
   class StringHashEquals {
    public:
     std::size_t operator()(const GcRoot<mirror::String>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::String>& a, const GcRoot<mirror::String>& b) const
         NO_THREAD_SAFETY_ANALYSIS;
+
+    // Utf8String can be used for lookup.
+    std::size_t operator()(const Utf8String& key) const { return key.GetHash(); }
+    bool operator()(const GcRoot<mirror::String>& a, const Utf8String& b) const
+        NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
@@ -159,6 +183,8 @@
     Table();
     mirror::String* Find(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
+    mirror::String* Find(const Utf8String& string) SHARED_REQUIRES(Locks::mutator_lock_)
+        REQUIRES(Locks::intern_table_lock_);
     void Insert(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
     void Remove(mirror::String* s)
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index b60b32d..fe78bf2 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -35,12 +35,14 @@
   Handle<mirror::String> foo_3(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "foo")));
   Handle<mirror::String> bar(hs.NewHandle(intern_table.InternStrong(3, "bar")));
+  ASSERT_TRUE(foo_1.Get() != nullptr);
+  ASSERT_TRUE(foo_2.Get() != nullptr);
+  ASSERT_TRUE(foo_3.Get() != nullptr);
+  ASSERT_TRUE(bar.Get() != nullptr);
+  EXPECT_EQ(foo_1.Get(), foo_2.Get());
   EXPECT_TRUE(foo_1->Equals("foo"));
   EXPECT_TRUE(foo_2->Equals("foo"));
   EXPECT_TRUE(foo_3->Equals("foo"));
-  EXPECT_TRUE(foo_1.Get() != nullptr);
-  EXPECT_TRUE(foo_2.Get() != nullptr);
-  EXPECT_EQ(foo_1.Get(), foo_2.Get());
   EXPECT_NE(foo_1.Get(), bar.Get());
   EXPECT_NE(foo_2.Get(), bar.Get());
   EXPECT_NE(foo_3.Get(), bar.Get());
@@ -175,4 +177,39 @@
   }
 }
 
+TEST_F(InternTableTest, LookupStrong) {
+  ScopedObjectAccess soa(Thread::Current());
+  InternTable intern_table;
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::String> foo(hs.NewHandle(intern_table.InternStrong(3, "foo")));
+  Handle<mirror::String> bar(hs.NewHandle(intern_table.InternStrong(3, "bar")));
+  Handle<mirror::String> foobar(hs.NewHandle(intern_table.InternStrong(6, "foobar")));
+  ASSERT_TRUE(foo.Get() != nullptr);
+  ASSERT_TRUE(bar.Get() != nullptr);
+  ASSERT_TRUE(foobar.Get() != nullptr);
+  ASSERT_TRUE(foo->Equals("foo"));
+  ASSERT_TRUE(bar->Equals("bar"));
+  ASSERT_TRUE(foobar->Equals("foobar"));
+  ASSERT_NE(foo.Get(), bar.Get());
+  ASSERT_NE(foo.Get(), foobar.Get());
+  ASSERT_NE(bar.Get(), foobar.Get());
+  mirror::String* lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
+  EXPECT_EQ(lookup_foo, foo.Get());
+  mirror::String* lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
+  EXPECT_EQ(lookup_bar, bar.Get());
+  mirror::String* lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
+  EXPECT_EQ(lookup_foobar, foobar.Get());
+  mirror::String* lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
+  EXPECT_TRUE(lookup_foox == nullptr);
+  mirror::String* lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
+  EXPECT_TRUE(lookup_fooba == nullptr);
+  mirror::String* lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
+  EXPECT_TRUE(lookup_foobaR == nullptr);
+  // Try a hash conflict.
+  ASSERT_EQ(ComputeUtf16HashFromModifiedUtf8("foobar", 6),
+            ComputeUtf16HashFromModifiedUtf8("foobbS", 6));
+  mirror::String* lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
+  EXPECT_TRUE(lookup_foobbS == nullptr);
+}
+
 }  // namespace art
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index baf4afe..1d0e600 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -264,12 +264,12 @@
                                     ShadowFrame& shadow_frame, JValue result_register);
 #endif
 
-static JValue Execute(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame,
-                      JValue result_register)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
-                             ShadowFrame& shadow_frame, JValue result_register) {
+static inline JValue Execute(
+    Thread* self,
+    const DexFile::CodeItem* code_item,
+    ShadowFrame& shadow_frame,
+    JValue result_register,
+    bool stay_in_interpreter = false) SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   if (LIKELY(shadow_frame.GetDexPC() == 0)) {  // Entering the method, but not via deoptimization.
@@ -284,24 +284,34 @@
                                         method, 0);
     }
 
-    jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr && jit->CanInvokeCompiledCode(method)) {
-      JValue result;
+    if (!stay_in_interpreter) {
+      jit::Jit* jit = Runtime::Current()->GetJit();
+      if (jit != nullptr) {
+        jit->MethodEntered(self, shadow_frame.GetMethod());
+        if (jit->CanInvokeCompiledCode(method)) {
+          JValue result;
 
-      // Pop the shadow frame before calling into compiled code.
-      self->PopShadowFrame();
-      ArtInterpreterToCompiledCodeBridge(self, code_item, &shadow_frame, &result);
-      // Push the shadow frame back as the caller will expect it.
-      self->PushShadowFrame(&shadow_frame);
+          // Pop the shadow frame before calling into compiled code.
+          self->PopShadowFrame();
+          ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);
+          // Push the shadow frame back as the caller will expect it.
+          self->PushShadowFrame(&shadow_frame);
 
-      return result;
+          return result;
+        }
+      }
     }
   }
 
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
+  // Lock counting is a special version of accessibility checks, and for simplicity and
+  // reduction of template parameters, we gate it behind access-checks mode.
+  ArtMethod* method = shadow_frame.GetMethod();
+  DCHECK(!method->SkipAccessChecks() || !method->MustCountLocks());
+
   bool transaction_active = Runtime::Current()->IsActiveTransaction();
-  if (LIKELY(shadow_frame.GetMethod()->SkipAccessChecks())) {
+  if (LIKELY(method->SkipAccessChecks())) {
     // Enter the "without access check" interpreter.
     if (kInterpreterImplKind == kMterpImplKind) {
       if (transaction_active) {
@@ -379,7 +389,8 @@
 }
 
 void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receiver,
-                                uint32_t* args, JValue* result) {
+                                uint32_t* args, JValue* result,
+                                bool stay_in_interpreter) {
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
@@ -454,7 +465,7 @@
     }
   }
   if (LIKELY(!method->IsNative())) {
-    JValue r = Execute(self, code_item, *shadow_frame, JValue());
+    JValue r = Execute(self, code_item, *shadow_frame, JValue(), stay_in_interpreter);
     if (result != nullptr) {
       *result = r;
     }
@@ -484,6 +495,10 @@
   // Are we executing the first shadow frame?
   bool first = true;
   while (shadow_frame != nullptr) {
+    // We do not want to recover lock state for lock counting when deoptimizing. Currently,
+    // the compiler should not have compiled a method that failed structured-locking checks.
+    DCHECK(!shadow_frame->GetMethod()->MustCountLocks());
+
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
@@ -503,8 +518,24 @@
       // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
       const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      DCHECK(instr->IsInvoke());
-      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+      if (instr->IsInvoke()) {
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+      } else if (instr->Opcode() == Instruction::NEW_INSTANCE) {
+        // It's possible to deoptimize at a NEW_INSTANCE dex instruciton that's for a
+        // java string, which is turned into a call into StringFactory.newEmptyString();
+        if (kIsDebugBuild) {
+          ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+          mirror::Class* klass = class_linker->ResolveType(
+              instr->VRegB_21c(), shadow_frame->GetMethod());
+          DCHECK(klass->IsStringClass());
+        }
+        // Skip the dex instruction since we essentially come back from an invocation.
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+      } else {
+        DCHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
+                      << " at dex_pc " << dex_pc
+                      << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
+      }
     } else {
       // Nothing to do, the dex_pc is the one at which the code requested
       // the deoptimization.
@@ -532,6 +563,10 @@
     return JValue();
   }
 
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->NotifyCompiledCodeToInterpreterTransition(self, shadow_frame->GetMethod());
+  }
   return Execute(self, code_item, *shadow_frame, JValue());
 }
 
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 6353a9b..bf4bcff 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -33,8 +33,11 @@
 namespace interpreter {
 
 // Called by ArtMethod::Invoke, shadow frames arguments are taken from the args array.
+// The optional stay_in_interpreter parameter (false by default) can be used by clients to
+// explicitly force interpretation in the remaining path that implements method invocation.
 extern void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method,
-                                       mirror::Object* receiver, uint32_t* args, JValue* result)
+                                       mirror::Object* receiver, uint32_t* args, JValue* result,
+                                       bool stay_in_interpreter = false)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
 // 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 3453abc..12d70c5 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -503,6 +503,7 @@
                                 uint32_t vregC) ALWAYS_INLINE;
 
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                        ArtMethod* caller,
                                         const DexFile::CodeItem* code_item,
                                         ShadowFrame* shadow_frame,
                                         JValue* result)
@@ -530,6 +531,10 @@
   uint16_t arg_offset = (code_item == nullptr)
                             ? 0
                             : code_item->registers_size_ - code_item->ins_size_;
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && caller != nullptr) {
+    jit->NotifyInterpreterToCompiledCodeTransition(self, caller);
+  }
   method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                  (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
                  result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
@@ -726,7 +731,8 @@
         target->GetEntryPointFromQuickCompiledCode())) {
       ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
     } else {
-      ArtInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToCompiledCodeBridge(
+          self, shadow_frame.GetMethod(), code_item, new_shadow_frame, result);
     }
   } else {
     UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 19d971e..69376fd 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -34,6 +34,7 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
 #include "lambda/art_lambda_method.h"
 #include "lambda/box_table.h"
 #include "lambda/closure.h"
@@ -94,7 +95,9 @@
   StackHandleScope<1> hs(self);
   Handle<Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorEnter(self);
-  frame->GetLockCountData().AddMonitor<kMonitorCounting>(self, h_ref.Get());
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    frame->GetLockCountData().AddMonitor(self, h_ref.Get());
+  }
 }
 
 template <bool kMonitorCounting>
@@ -106,7 +109,19 @@
   StackHandleScope<1> hs(self);
   Handle<Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorExit(self);
-  frame->GetLockCountData().RemoveMonitorOrThrow<kMonitorCounting>(self, h_ref.Get());
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    frame->GetLockCountData().RemoveMonitorOrThrow(self, h_ref.Get());
+  }
+}
+
+template <bool kMonitorCounting>
+static inline bool DoMonitorCheckOnExit(Thread* self, ShadowFrame* frame)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
+    return frame->GetLockCountData().CheckAllMonitorsReleasedOrThrow(self);
+  }
+  return true;
 }
 
 void AbortTransactionF(Thread* self, const char* fmt, ...)
@@ -628,6 +643,15 @@
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      if (type == kVirtual || type == kInterface) {
+        jit->InvokeVirtualOrInterface(
+            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    }
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (type == kVirtual || type == kInterface) {
       instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
       if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
@@ -667,7 +691,14 @@
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      jit->InvokeVirtualOrInterface(
+          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+      jit->AddSamples(self, shadow_frame.GetMethod(), 1, /*with_backedges*/false);
+    }
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
       instrumentation->InvokeVirtualOrInterface(
           self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
@@ -984,8 +1015,11 @@
   return branch_offset <= 0;
 }
 
-void ArtInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                        ShadowFrame* shadow_frame, JValue* result);
+void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                        ArtMethod* caller,
+                                        const DexFile::CodeItem* code_item,
+                                        ShadowFrame* shadow_frame,
+                                        JValue* result);
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 12d6fdc..f03036b 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -64,15 +64,22 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BRANCH_INSTRUMENTATION(offset)                                                            \
-  do {                                                                                            \
-    ArtMethod* method = shadow_frame.GetMethod();                                                 \
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->Branch(self, method, dex_pc, offset);                                        \
-    JValue result;                                                                                \
-    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {             \
-      return result;                                                                              \
-    }                                                                                             \
+#define BRANCH_INSTRUMENTATION(offset)                                                          \
+  do {                                                                                          \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                      \
+      instrumentation->Branch(self, method, dex_pc, offset);                                    \
+    }                                                                                           \
+    JValue result;                                                                              \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {           \
+      return result;                                                                            \
+    }                                                                                           \
+  } while (false)
+
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1, /*with_backedges*/ true);                               \
+    }                                                                                          \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -97,8 +104,7 @@
   } HANDLE_INSTRUCTION_END();
 
 #define HANDLE_MONITOR_CHECKS()                                                                   \
-  if (!shadow_frame.GetLockCountData().                                                           \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+  if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
   }
 
@@ -186,6 +192,9 @@
   UPDATE_HANDLER_TABLE();
   std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
   size_t lambda_captured_variable_index = 0;
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
 
   // Jump to first instruction.
   ADVANCE(0);
@@ -277,7 +286,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -292,7 +300,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -308,7 +315,6 @@
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -323,7 +329,6 @@
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -359,7 +364,6 @@
       }
     }
     result.SetL(obj_result);
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -630,6 +634,7 @@
     int8_t offset = inst->VRegA_10t(inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -643,6 +648,7 @@
     int16_t offset = inst->VRegA_20t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -656,6 +662,7 @@
     int32_t offset = inst->VRegA_30t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -669,6 +676,7 @@
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -682,6 +690,7 @@
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -785,6 +794,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -804,6 +814,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -823,6 +834,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,6 +854,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -861,6 +874,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -880,6 +894,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -898,6 +913,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -916,6 +932,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -934,6 +951,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -952,6 +970,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -970,6 +989,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -988,6 +1008,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -2558,12 +2579,11 @@
       self->CheckSuspend();
       UPDATE_HANDLER_TABLE();
     }
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
       // Structured locking is to be enforced for abnormal termination, too.
-      shadow_frame.GetLockCountData().CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);
+      DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);
       return JValue(); /* Handled in caller. */
     } else {
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
@@ -2579,8 +2599,6 @@
 // a constant condition that would remove the "if" statement so the test is free.
 #define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
   alt_op_##code: {                                                                            \
-    Runtime* const runtime = Runtime::Current();                                              \
-    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
     if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
       Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
       instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0488dbf..18330ba 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -34,9 +34,9 @@
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
-      shadow_frame.GetLockCountData().                                                          \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
+      DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);                        \
       if (interpret_one_instruction) {                                                          \
+        /* Signal mterp to return to caller */                                                  \
         shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                            \
       }                                                                                         \
       return JValue(); /* Handled in caller. */                                                 \
@@ -56,8 +56,7 @@
   } while (false)
 
 #define HANDLE_MONITOR_CHECKS()                                                                   \
-  if (!shadow_frame.GetLockCountData().                                                           \
-          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+  if (!DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame)) {                       \
     HANDLE_PENDING_EXCEPTION();                                                                   \
   }
 
@@ -72,14 +71,26 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                         \
   do {                                                                                         \
-    ArtMethod* method = shadow_frame.GetMethod();                                              \
-    instrumentation->Branch(self, method, dex_pc, offset);                                     \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                     \
+      instrumentation->Branch(self, method, dex_pc, offset);                                   \
+    }                                                                                          \
     JValue result;                                                                             \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
+      if (interpret_one_instruction) {                                                         \
+        /* OSR has completed execution of the method.  Signal mterp to return to caller */     \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                           \
+      }                                                                                        \
       return result;                                                                           \
     }                                                                                          \
   } while (false)
 
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1, /*with_backedges*/ true);                               \
+    }                                                                                          \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -101,6 +112,8 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
 
   // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
   // to keep this live for the scope of the entire function call.
@@ -205,6 +218,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -221,6 +235,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -238,6 +253,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -254,6 +270,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -292,6 +309,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -564,6 +582,7 @@
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -574,6 +593,7 @@
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -584,6 +604,7 @@
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -594,6 +615,7 @@
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -604,6 +626,7 @@
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -708,6 +731,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,6 +748,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -740,6 +765,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -756,6 +782,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -772,6 +799,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -788,6 +816,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +832,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -818,6 +848,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -833,6 +864,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -848,6 +880,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -863,6 +896,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -878,6 +912,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index cfad714..8fad42f 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -9,23 +8,12 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index 981c036..a6b131d 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -33,10 +33,8 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r4-r10,fp,lr}
-    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
-    .pad    #4
-    sub     sp, sp, #4                  @ align 64
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -57,6 +55,12 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 3456a75..62e573a 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -114,21 +114,117 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
-    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     1f
+    bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -176,9 +272,27 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-    add     sp, sp, #4                              @ un-align 64
-    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 298af8a..039bcbe 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -72,7 +72,8 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -90,12 +91,13 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     r4
-#define rFP     r5
-#define rSELF   r6
-#define rINST   r7
-#define rIBASE  r8
-#define rREFS   r11
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -109,7 +111,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
index e57b19c..6626ff0 100644
--- a/runtime/interpreter/mterp/arm/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -1,22 +1,6 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
-     *
-     * We load the full values with LDM, but in practice many values could
-     * be resolved by only looking at the high word.  This could be made
-     * faster or slower by splitting the LDM into a pair of LDRs.
-     *
-     * If we just wanted to set condition flags, we could do this:
-     *  subs    ip, r0, r2
-     *  sbcs    ip, r1, r3
-     *  subeqs  ip, r0, r2
-     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
-     * integer value, which we can do with 2 conditional mov/mvn instructions
-     * (set 1, set -1; if they're equal we already have 0 in ip), giving
-     * us a constant 5-cycle path plus a branch at the end to the
-     * instruction epilogue code.  The multi-compare approach below needs
-     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
-     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -27,30 +11,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
-    blt     .L${opcode}_less            @ signed compare on high part
-    bgt     .L${opcode}_greater
-    subs    r1, r0, r2                  @ r1<- r0 - r2
-    bhi     .L${opcode}_greater         @ unsigned compare on low part
-    bne     .L${opcode}_less
-    b       .L${opcode}_finish          @ equal; r1 already holds 0
-%break
-
-.L${opcode}_less:
-    mvn     r1, #0                      @ r1<- -1
-    @ Want to cond code the next mov so we can avoid branch, but don't see it;
-    @ instead, we just replicate the tail end.
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L${opcode}_greater:
-    mov     r1, #1                      @ r1<- 1
-    @ fall through to _finish
-
-.L${opcode}_finish:
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
+    SET_VREG ip, r9                     @ vAA<- ip
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 6861950..aa42dfd 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -5,32 +5,5 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 91639ca..12a6bc0 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,27 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index e730b52..7325a1c 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,31 +10,7 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
index 5685686..b8b6a6e 100644
--- a/runtime/interpreter/mterp/arm/op_if_eq.S
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"ne" }
+%include "arm/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
index 2a9c0f9..7012f61 100644
--- a/runtime/interpreter/mterp/arm/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"ne" }
+%include "arm/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
index 60a0307..eb29e63 100644
--- a/runtime/interpreter/mterp/arm/op_if_ge.S
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"lt" }
+%include "arm/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
index 981cdec..d9da374 100644
--- a/runtime/interpreter/mterp/arm/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"lt" }
+%include "arm/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
index ca50cd7..a35eab8 100644
--- a/runtime/interpreter/mterp/arm/op_if_gt.S
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"le" }
+%include "arm/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
index c621812..4ef4d8e 100644
--- a/runtime/interpreter/mterp/arm/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"le" }
+%include "arm/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
index 7e060f2..c7c31bc 100644
--- a/runtime/interpreter/mterp/arm/op_if_le.S
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"gt" }
+%include "arm/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
index f92be23..9fbf6c9 100644
--- a/runtime/interpreter/mterp/arm/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"gt" }
+%include "arm/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
index 213344d..9469fbb 100644
--- a/runtime/interpreter/mterp/arm/op_if_lt.S
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"ge" }
+%include "arm/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
index dfd4e44..a4fc1b8 100644
--- a/runtime/interpreter/mterp/arm/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"ge" }
+%include "arm/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
index 4a58b4a..c945331 100644
--- a/runtime/interpreter/mterp/arm/op_if_ne.S
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"eq" }
+%include "arm/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
index d864ef4..2d81fda 100644
--- a/runtime/interpreter/mterp/arm/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"eq" }
+%include "arm/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
index 8f40f19..a13c803 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -24,13 +24,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
index 7ef24c5..4c1f058 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -13,12 +13,12 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 4c369cb..412c58f 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,33 +16,5 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 3d7dec0..5db8b6c 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -1,29 +1,17 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index 2356ecb..8dd4fed 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -10,22 +9,11 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.${condition} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 23e656e..9fbbbd3 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,11 +31,12 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xIBASE, xREFS, [sp, #-64]!
-    stp     xSELF, xINST, [sp, #16]
-    stp     xPC, xFP, [sp, #32]
-    stp     fp, lr, [sp, #48]
-    add     fp, sp, #48
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -56,6 +57,12 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index aae78de..2d3a11e 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -107,6 +107,107 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -175,10 +276,36 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-    ldp     fp, lr, [sp, #48]
-    ldp     xPC, xFP, [sp, #32]
-    ldp     xSELF, xINST, [sp, #16]
-    ldp     xIBASE, xREFS, [sp], #64
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 7101ba9..4257200 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -74,6 +74,7 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -92,15 +93,17 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC     x20
-#define xFP     x21
-#define xSELF   x22
-#define xINST   x23
-#define wINST   w23
-#define xIBASE  x24
-#define xREFS   x25
-#define ip      x16
-#define ip2     x17
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -114,7 +117,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index 7e2f6a9..6381e94 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -5,21 +5,5 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
-    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
-       // If backwards branch refresh rIBASE
-    b.mi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index b2b9924..fb9a80a 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -6,17 +6,4 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    b.mi    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index b785857..b13cb41 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -13,17 +13,4 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index e8b4f04..1456f1a 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -17,17 +17,4 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index 3f1e1b1..b303e6a 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,29 +1,17 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.${condition} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 10b19c5..bd1af04 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,7 +20,6 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
-#include "jit/jit.h"
 #include "debugger.h"
 
 namespace art {
@@ -432,7 +431,7 @@
 }
 
 extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
   if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
@@ -444,7 +443,7 @@
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -452,7 +451,7 @@
 }
 
 extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -460,7 +459,7 @@
 }
 
 extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -468,7 +467,7 @@
 }
 
 extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -476,7 +475,7 @@
 }
 
 extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -484,7 +483,7 @@
 }
 
 extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -492,7 +491,7 @@
 }
 
 extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -501,7 +500,7 @@
 }
 
 extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -509,7 +508,7 @@
 }
 
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -521,7 +520,7 @@
 }
 
 extern "C" bool MterpSuspendCheck(Thread* self)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
   return MterpShouldSwitchInterpreters();
 }
@@ -617,7 +616,7 @@
 }
 
 extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(arr == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -631,7 +630,7 @@
 }
 
 extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -639,13 +638,93 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+/*
+ * Create a hotness_countdown based on the current method hotness_count and profiling
+ * mode.  In short, determine how many hotness events we hit before reporting back
+ * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
+ * and regenerated following batch updates.
+ */
+extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint16_t hotness_count = method->GetCounter();
+  int32_t countdown_value = jit::kJitHotnessDisabled;
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    int32_t warm_threshold = jit->WarmMethodThreshold();
+    int32_t hot_threshold = jit->HotMethodThreshold();
+    int32_t osr_threshold = jit->OSRMethodThreshold();
+    if (hotness_count < warm_threshold) {
+      countdown_value = warm_threshold - hotness_count;
+    } else if (hotness_count < hot_threshold) {
+      countdown_value = hot_threshold - hotness_count;
+    } else if (hotness_count < osr_threshold) {
+      countdown_value = osr_threshold - hotness_count;
+    } else {
+      countdown_value = jit::kJitCheckForOSR;
+    }
+    if (jit::Jit::ShouldUsePriorityThreadWeight()) {
+      int32_t priority_thread_weight = jit->PriorityThreadWeight();
+      countdown_value = std::min(countdown_value, countdown_value / priority_thread_weight);
+    }
+  }
+  /*
+   * The actual hotness threshold may exceed the range of our int16_t countdown value.  This is
+   * not a problem, though.  We can just break it down into smaller chunks.
+   */
+  countdown_value = std::min(countdown_value,
+                             static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
+  shadow_frame->SetCachedHotnessCountdown(countdown_value);
+  shadow_frame->SetHotnessCountdown(countdown_value);
+  return countdown_value;
+}
+
+/*
+ * Report a batch of hotness events to the instrumentation and then return the new
+ * countdown value to the next time we should report.
+ */
+extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
+                                        ShadowFrame* shadow_frame,
+                                        Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown();
+    jit->AddSamples(self, method, count, /*with_backedges*/ true);
+  }
+  return MterpSetUpHotnessCountdown(method, shadow_frame);
+}
+
+// TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
 extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
   uint32_t dex_pc = shadow_frame->GetDexPC();
-  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
-  instrumentation->Branch(self, method, dex_pc, offset);
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if ((jit != nullptr) && (offset <= 0)) {
+    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
+  }
+  int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
+  if (countdown_value == jit::kJitCheckForOSR) {
+    return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+  } else {
+    return false;
+  }
+}
+
+extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
+                                               ShadowFrame* shadow_frame,
+                                               int32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (offset <= 0) {
+    // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
+    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
+  }
+  // Assumes caller has already determined that an OSR check is appropriate.
   return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
 }
 
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 092474d..a38a87b 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -79,7 +79,8 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -97,12 +98,13 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     r4
-#define rFP     r5
-#define rSELF   r6
-#define rINST   r7
-#define rIBASE  r8
-#define rREFS   r11
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -116,7 +118,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -329,10 +331,8 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r4-r10,fp,lr}
-    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
-    .pad    #4
-    sub     sp, sp, #4                  @ align 64
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -353,6 +353,12 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1103,35 +1109,8 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1144,30 +1123,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1185,34 +1142,10 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1228,7 +1161,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1236,36 +1168,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1282,7 +1186,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1290,36 +1193,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
 
 
 /* ------------------------------ */
@@ -1485,22 +1360,6 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
-     *
-     * We load the full values with LDM, but in practice many values could
-     * be resolved by only looking at the high word.  This could be made
-     * faster or slower by splitting the LDM into a pair of LDRs.
-     *
-     * If we just wanted to set condition flags, we could do this:
-     *  subs    ip, r0, r2
-     *  sbcs    ip, r1, r3
-     *  subeqs  ip, r0, r2
-     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
-     * integer value, which we can do with 2 conditional mov/mvn instructions
-     * (set 1, set -1; if they're equal we already have 0 in ip), giving
-     * us a constant 5-cycle path plus a branch at the end to the
-     * instruction epilogue code.  The multi-compare approach below needs
-     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
-     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -1511,13 +1370,16 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
-    blt     .Lop_cmp_long_less            @ signed compare on high part
-    bgt     .Lop_cmp_long_greater
-    subs    r1, r0, r2                  @ r1<- r0 - r2
-    bhi     .Lop_cmp_long_greater         @ unsigned compare on low part
-    bne     .Lop_cmp_long_less
-    b       .Lop_cmp_long_finish          @ equal; r1 already holds 0
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG ip, r9                     @ vAA<- ip
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1525,9 +1387,8 @@
 /* File: arm/op_if_eq.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1535,24 +1396,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movne rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1563,9 +1413,8 @@
 /* File: arm/op_if_ne.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1573,24 +1422,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    moveq rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1601,9 +1439,8 @@
 /* File: arm/op_if_lt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1611,24 +1448,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movge rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1639,9 +1465,8 @@
 /* File: arm/op_if_ge.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1649,24 +1474,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movlt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1677,9 +1491,8 @@
 /* File: arm/op_if_gt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1687,24 +1500,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movle rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1715,9 +1517,8 @@
 /* File: arm/op_if_le.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1725,24 +1526,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movgt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1753,32 +1543,20 @@
 /* File: arm/op_if_eqz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movne rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1789,32 +1567,20 @@
 /* File: arm/op_if_nez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    moveq rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1825,32 +1591,20 @@
 /* File: arm/op_if_ltz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movge rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1861,32 +1615,20 @@
 /* File: arm/op_if_gez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movlt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1897,32 +1639,20 @@
 /* File: arm/op_if_gtz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movle rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1933,32 +1663,20 @@
 /* File: arm/op_if_lez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movgt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -4711,15 +4429,15 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -5877,14 +5595,14 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -7616,27 +7334,6 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_cmp_long */
-
-.Lop_cmp_long_less:
-    mvn     r1, #0                      @ r1<- -1
-    @ Want to cond code the next mov so we can avoid branch, but don't see it;
-    @ instead, we just replicate the tail end.
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.Lop_cmp_long_greater:
-    mov     r1, #1                      @ r1<- 1
-    @ fall through to _finish
-
-.Lop_cmp_long_finish:
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
 /* continuation for op_float_to_long */
 /*
  * Convert the float in r0 to a long in r0/r1.
@@ -12207,21 +11904,117 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
-    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     1f
+    bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -12269,9 +12062,27 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-    add     sp, sp, #4                              @ un-align 64
-    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 6ae59d8..55797e6 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -81,6 +81,7 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -99,15 +100,17 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC     x20
-#define xFP     x21
-#define xSELF   x22
-#define xINST   x23
-#define wINST   w23
-#define xIBASE  x24
-#define xREFS   x25
-#define ip      x16
-#define ip2     x17
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -121,7 +124,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -323,11 +326,12 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xIBASE, xREFS, [sp, #-64]!
-    stp     xSELF, xINST, [sp, #16]
-    stp     xPC, xFP, [sp, #32]
-    stp     fp, lr, [sp, #48]
-    add     fp, sp, #48
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -348,6 +352,12 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1081,24 +1091,8 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
-    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
-       // If backwards branch refresh rIBASE
-    b.mi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1112,20 +1106,7 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    b.mi    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1146,20 +1127,7 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1183,20 +1151,7 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1221,20 +1176,7 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1365,9 +1307,8 @@
 /* File: arm64/op_if_eq.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1376,23 +1317,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.eq MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1403,9 +1333,8 @@
 /* File: arm64/op_if_ne.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1414,23 +1343,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.ne MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1441,9 +1359,8 @@
 /* File: arm64/op_if_lt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1452,23 +1369,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.lt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1479,9 +1385,8 @@
 /* File: arm64/op_if_ge.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1490,23 +1395,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.ge MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1517,9 +1411,8 @@
 /* File: arm64/op_if_gt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1528,23 +1421,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1555,9 +1437,8 @@
 /* File: arm64/op_if_le.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1566,23 +1447,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1593,32 +1463,20 @@
 /* File: arm64/op_if_eqz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.eq MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1629,32 +1487,20 @@
 /* File: arm64/op_if_nez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.ne MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1665,32 +1511,20 @@
 /* File: arm64/op_if_ltz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.lt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1701,32 +1535,20 @@
 /* File: arm64/op_if_gez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.ge MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1737,32 +1559,20 @@
 /* File: arm64/op_if_gtz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1773,32 +1583,20 @@
 /* File: arm64/op_if_lez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -11596,6 +11394,107 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -11664,10 +11563,36 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-    ldp     fp, lr, [sp, #48]
-    ldp     xPC, xFP, [sp, #32]
-    ldp     xSELF, xINST, [sp, #16]
-    ldp     xIBASE, xREFS, [sp], #64
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index ebac5fc..f78e1bc 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -124,6 +124,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 4 bytes for return address + 4 * 4 for spills
  */
@@ -155,43 +170,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %edx
 #define rREFS    %ebp
+#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-    RESTORE_IBASE
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -399,6 +382,13 @@
     lea     (rPC, %eax, 2), rPC
     EXPORT_PC
 
+    /* Set up for backwards branches & osr profiling */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+
     /* Starting ibase */
     REFRESH_IBASE
 
@@ -1099,12 +1089,8 @@
  */
     /* goto +AA */
     movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle      MterpCheckSuspendAndContinue   # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1118,12 +1104,8 @@
  */
     /* goto/16 +AAAA */
     movswl  2(rPC), rINST                   # rINST <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1142,12 +1124,8 @@
  */
     /* goto/32 +AAAAAAAA */
     movl    2(rPC), rINST                   # rINST <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1169,14 +1147,10 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL(MterpDoPackedSwitch)
-    movl    %eax, rINST
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST
-    leal    (rPC, rINST), rPC
-    FETCH_INST
     REFRESH_IBASE
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1199,14 +1173,10 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL(MterpDoSparseSwitch)
-    movl    %eax, rINST
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST
-    leal    (rPC, rINST), rPC
-    FETCH_INST
     REFRESH_IBASE
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
 
 
 /* ------------------------------ */
@@ -1423,16 +1393,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     jne   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1453,16 +1421,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     je   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1483,16 +1449,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     jge   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1513,16 +1477,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     jl   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1543,16 +1505,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     jle   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1573,16 +1533,14 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $2, rINST
     jg   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1599,16 +1557,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     jne   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1625,16 +1581,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     je   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1651,16 +1605,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     jge   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1677,16 +1629,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     jl   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1703,16 +1653,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     jle   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1729,16 +1677,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $2, rINST
     jg   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -12936,20 +12882,121 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpw    $JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decw    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
     movl    rSELF, %eax
-    EXPORT_PC
     testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
-    jz      1f
-    movl    %eax, OUT_ARG0(%esp)
-    call    SYMBOL(MterpSuspendCheck)
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
     REFRESH_IBASE
-1:
     GOTO_NEXT
 
+.L_suspend_request_pending:
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    $2, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -12994,7 +13041,29 @@
     movl    %ecx, 4(%edx)
     mov     $1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmpw    $0, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addl    $FRAME_SIZE, %esp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index a1360e0..031cec8 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -67,7 +67,7 @@
 Some key interpreter variables will be assigned to registers.
 
   nick     reg   purpose
-  rSELF    rbp   pointer to ThreadSelf.
+  rPROFILE rbp   countdown register for jit profiling
   rPC      r12   interpreted program counter, used for fetching instructions
   rFP      r13   interpreted frame pointer, used for accessing locals and args
   rINSTw   bx    first 16-bit code of current instruction
@@ -120,6 +120,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -130,6 +145,8 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -144,7 +161,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    %rbp
+#define rSELF    SELF_SPILL(%rsp)
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -154,40 +171,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
+#define rPROFILE %ebp
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    rINST, OUT_32_ARG2
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -211,7 +199,8 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
 .endm
 
 /*
@@ -377,6 +366,12 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
@@ -579,9 +574,10 @@
 .L_op_move_exception: /* 0x0d */
 /* File: x86_64/op_move_exception.S */
     /* move-exception vAA */
-    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 /* ------------------------------ */
@@ -590,9 +586,9 @@
 /* File: x86_64/op_return_void.S */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -610,9 +606,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -628,9 +624,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
@@ -649,9 +645,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -854,7 +850,8 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
@@ -988,7 +985,8 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
     jmp     MterpException
 
 /* ------------------------------ */
@@ -1003,12 +1001,8 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1022,12 +1016,8 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1044,12 +1034,8 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1069,13 +1055,9 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoPackedSwitch)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1096,13 +1078,9 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoSparseSwitch)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
 
 
 /* ------------------------------ */
@@ -1309,16 +1287,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1339,16 +1315,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     je   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1369,16 +1343,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1399,16 +1371,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1429,16 +1399,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1459,16 +1427,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1485,16 +1451,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1511,16 +1475,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     je   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1537,16 +1499,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1563,16 +1523,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1589,16 +1547,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1615,16 +1571,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1767,7 +1721,8 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2099,7 +2054,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet32InstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2131,7 +2087,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet64InstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2164,7 +2121,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetObjInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 1
@@ -2197,7 +2155,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetBooleanInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2230,7 +2189,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetByteInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2263,7 +2223,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetCharInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2296,7 +2257,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetShortInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2489,7 +2451,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet32StaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2519,7 +2482,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet64StaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2550,7 +2514,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetObjStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 1
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2581,7 +2546,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetBooleanStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2612,7 +2578,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetByteStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2643,7 +2610,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetCharStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2674,7 +2642,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetShortStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -3002,9 +2971,9 @@
     .balign 128
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86_64/op_return_void_no_barrier.S */
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -5712,7 +5681,8 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -11849,7 +11819,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -11860,7 +11830,8 @@
  * interpreter.
  */
 MterpPossibleException:
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -11891,19 +11862,114 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)
-1:
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
     GOTO_NEXT
 
+.L_no_count_backwards:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -11943,7 +12009,28 @@
     movq    %rax, (%rdx)
     movl    $1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addq    $FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86/bincmp.S b/runtime/interpreter/mterp/x86/bincmp.S
index c72a5cf..ee32278 100644
--- a/runtime/interpreter/mterp/x86/bincmp.S
+++ b/runtime/interpreter/mterp/x86/bincmp.S
@@ -11,13 +11,11 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $$4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
-    movl    $$2, rINST
     j${revcmp}   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
index 785efdc..384dd9a 100644
--- a/runtime/interpreter/mterp/x86/entry.S
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -64,6 +64,13 @@
     lea     (rPC, %eax, 2), rPC
     EXPORT_PC
 
+    /* Set up for backwards branches & osr profiling */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+
     /* Starting ibase */
     REFRESH_IBASE
 
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index 3965ecd..e8c8ca8 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -131,20 +131,121 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decw    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
     movl    rSELF, %eax
-    EXPORT_PC
     testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
-    jz      1f
-    movl    %eax, OUT_ARG0(%esp)
-    call    SYMBOL(MterpSuspendCheck)
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
     REFRESH_IBASE
-1:
     GOTO_NEXT
 
+.L_suspend_request_pending:
+    EXPORT_PC
+    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
+    GOTO_NEXT
+
+.L_no_count_backwards:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leal    (rPC, rINST, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    $$2, OUT_ARG2(%esp)
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    REFRESH_IBASE
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -189,7 +290,29 @@
     movl    %ecx, 4(%edx)
     mov     $$1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmpw    $$0, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
+    movl    %ecx, OUT_ARG1(%esp)
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG2(%esp)
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addl    $$FRAME_SIZE, %esp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 5729b90..3a2dcb7 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -117,6 +117,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 4 bytes for return address + 4 * 4 for spills
  */
@@ -148,43 +163,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %edx
 #define rREFS    %ebp
+#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-    RESTORE_IBASE
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/x86/op_goto.S b/runtime/interpreter/mterp/x86/op_goto.S
index 9a87361..1827d68 100644
--- a/runtime/interpreter/mterp/x86/op_goto.S
+++ b/runtime/interpreter/mterp/x86/op_goto.S
@@ -6,9 +6,5 @@
  */
     /* goto +AA */
     movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle      MterpCheckSuspendAndContinue   # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_goto_16.S b/runtime/interpreter/mterp/x86/op_goto_16.S
index a25c31b..ea5ea90 100644
--- a/runtime/interpreter/mterp/x86/op_goto_16.S
+++ b/runtime/interpreter/mterp/x86/op_goto_16.S
@@ -6,9 +6,5 @@
  */
     /* goto/16 +AAAA */
     movswl  2(rPC), rINST                   # rINST <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_goto_32.S b/runtime/interpreter/mterp/x86/op_goto_32.S
index 159128b..4becaf3 100644
--- a/runtime/interpreter/mterp/x86/op_goto_32.S
+++ b/runtime/interpreter/mterp/x86/op_goto_32.S
@@ -11,9 +11,5 @@
  */
     /* goto/32 +AAAAAAAA */
     movl    2(rPC), rINST                   # rINST <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # rINST <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/op_packed_switch.S b/runtime/interpreter/mterp/x86/op_packed_switch.S
index e33cf75..fcb7509 100644
--- a/runtime/interpreter/mterp/x86/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86/op_packed_switch.S
@@ -15,11 +15,7 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL($func)
-    movl    %eax, rINST
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST
-    leal    (rPC, rINST), rPC
-    FETCH_INST
     REFRESH_IBASE
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    testl   %eax, %eax
+    movl    %eax, rINST
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86/zcmp.S b/runtime/interpreter/mterp/x86/zcmp.S
index 0f28d1a..c116159 100644
--- a/runtime/interpreter/mterp/x86/zcmp.S
+++ b/runtime/interpreter/mterp/x86/zcmp.S
@@ -7,13 +7,11 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $$0, VREG_ADDRESS(rINST)        # compare (vA, 0)
-    movl    $$2, rINST
     j${revcmp}   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
+    testl   rINST, rINST
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addl    rINST, rINST                    # eax <- AA * 2
-    leal    (rPC, rINST), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpw    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/bincmp.S b/runtime/interpreter/mterp/x86_64/bincmp.S
index a16050b..6601483 100644
--- a/runtime/interpreter/mterp/x86_64/bincmp.S
+++ b/runtime/interpreter/mterp/x86_64/bincmp.S
@@ -11,13 +11,11 @@
     andb    $$0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $$2, rINST                      # assume not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/entry.S b/runtime/interpreter/mterp/x86_64/entry.S
index 69b2371..d992956 100644
--- a/runtime/interpreter/mterp/x86_64/entry.S
+++ b/runtime/interpreter/mterp/x86_64/entry.S
@@ -65,6 +65,12 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
index 573256b..f78f163 100644
--- a/runtime/interpreter/mterp/x86_64/footer.S
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -71,7 +71,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -82,7 +82,8 @@
  * interpreter.
  */
 MterpPossibleException:
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -113,19 +114,114 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)
-1:
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
     GOTO_NEXT
 
+.L_no_count_backwards:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $$2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -165,7 +261,28 @@
     movq    %rax, (%rdx)
     movl    $$1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addq    $$FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86_64/header.S b/runtime/interpreter/mterp/x86_64/header.S
index eb84ea1..7699fc4 100644
--- a/runtime/interpreter/mterp/x86_64/header.S
+++ b/runtime/interpreter/mterp/x86_64/header.S
@@ -60,7 +60,7 @@
 Some key interpreter variables will be assigned to registers.
 
   nick     reg   purpose
-  rSELF    rbp   pointer to ThreadSelf.
+  rPROFILE rbp   countdown register for jit profiling
   rPC      r12   interpreted program counter, used for fetching instructions
   rFP      r13   interpreted frame pointer, used for accessing locals and args
   rINSTw   bx    first 16-bit code of current instruction
@@ -113,6 +113,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -123,6 +138,8 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -137,7 +154,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    %rbp
+#define rSELF    SELF_SPILL(%rsp)
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -147,40 +164,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
+#define rPROFILE %ebp
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    rINST, OUT_32_ARG2
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -204,7 +192,8 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
 .endm
 
 /*
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_object.S b/runtime/interpreter/mterp/x86_64/op_aget_object.S
index 8baedea..5f77a97 100644
--- a/runtime/interpreter/mterp/x86_64/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_aget_object.S
@@ -10,7 +10,8 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_goto.S b/runtime/interpreter/mterp/x86_64/op_goto.S
index c4fc976..9749901 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto.S
@@ -6,9 +6,5 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_16.S b/runtime/interpreter/mterp/x86_64/op_goto_16.S
index 8cb9a5c..77688e0 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_16.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_16.S
@@ -6,9 +6,5 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_32.S b/runtime/interpreter/mterp/x86_64/op_goto_32.S
index 4ecdacd..29d777b 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_32.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_32.S
@@ -9,9 +9,5 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_iget.S b/runtime/interpreter/mterp/x86_64/op_iget.S
index a0d0faf..df43efe 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget.S
@@ -12,7 +12,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL($helper)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
index 964d20a..176c954 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
@@ -7,7 +7,8 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_instance_of.S b/runtime/interpreter/mterp/x86_64/op_instance_of.S
index 6be37f9..4819833 100644
--- a/runtime/interpreter/mterp/x86_64/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86_64/op_instance_of.S
@@ -14,7 +14,8 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
diff --git a/runtime/interpreter/mterp/x86_64/op_move_exception.S b/runtime/interpreter/mterp/x86_64/op_move_exception.S
index d0a14fd..33db878 100644
--- a/runtime/interpreter/mterp/x86_64/op_move_exception.S
+++ b/runtime/interpreter/mterp/x86_64/op_move_exception.S
@@ -1,5 +1,6 @@
     /* move-exception vAA */
-    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index cb0acb7..fdf5a50 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -13,10 +13,6 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL($func)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
index 14f4f8a..07e0e53 100644
--- a/runtime/interpreter/mterp/x86_64/op_return.S
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -6,9 +6,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
index 46a5753..6a12df3 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -1,8 +1,8 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
index 92e3506..822b2e8 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
index f2d6e04..288eb96 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -4,9 +4,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
index 38d9a5e..d39e6c4 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -11,7 +11,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL($helper)
-    cmpl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if $is_object
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_throw.S b/runtime/interpreter/mterp/x86_64/op_throw.S
index 22ed990..8095c25 100644
--- a/runtime/interpreter/mterp/x86_64/op_throw.S
+++ b/runtime/interpreter/mterp/x86_64/op_throw.S
@@ -6,5 +6,6 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
     jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86_64/zcmp.S b/runtime/interpreter/mterp/x86_64/zcmp.S
index 0051407..fb8ae6a 100644
--- a/runtime/interpreter/mterp/x86_64/zcmp.S
+++ b/runtime/interpreter/mterp/x86_64/zcmp.S
@@ -7,13 +7,11 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $$0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $$2, rINST                      # assume branch not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index b21f1ec..1f473e4 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -16,7 +16,13 @@
 
 #include "unstarted_runtime.h"
 
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+
 #include <cmath>
+#include <limits>
+#include <locale>
 #include <unordered_map>
 
 #include "ScopedLocalRef.h"
@@ -38,6 +44,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "nth_caller_visitor.h"
+#include "reflection.h"
 #include "thread.h"
 #include "transaction.h"
 #include "well_known_classes.h"
@@ -66,6 +73,43 @@
   }
 }
 
+// Restricted support for character upper case / lower case. Only support ASCII, where
+// it's easy. Abort the transaction otherwise.
+static void CharacterLowerUpper(Thread* self,
+                                ShadowFrame* shadow_frame,
+                                JValue* result,
+                                size_t arg_offset,
+                                bool to_lower_case) SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t int_value = static_cast<uint32_t>(shadow_frame->GetVReg(arg_offset));
+
+  // Only ASCII (7-bit).
+  if (!isascii(int_value)) {
+    AbortTransactionOrFail(self,
+                           "Only support ASCII characters for toLowerCase/toUpperCase: %u",
+                           int_value);
+    return;
+  }
+
+  std::locale c_locale("C");
+  char char_value = static_cast<char>(int_value);
+
+  if (to_lower_case) {
+    result->SetI(std::tolower(char_value, c_locale));
+  } else {
+    result->SetI(std::toupper(char_value, c_locale));
+  }
+}
+
+void UnstartedRuntime::UnstartedCharacterToLowerCase(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  CharacterLowerUpper(self, shadow_frame, result, arg_offset, true);
+}
+
+void UnstartedRuntime::UnstartedCharacterToUpperCase(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  CharacterLowerUpper(self, shadow_frame, result, arg_offset, false);
+}
+
 // Helper function to deal with class loading in an unstarted runtime.
 static void UnstartedRuntimeFindClass(Thread* self, Handle<mirror::String> className,
                                       Handle<mirror::ClassLoader> class_loader, JValue* result,
@@ -282,6 +326,23 @@
   }
 }
 
+// Special managed code cut-out to allow constructor lookup in a un-started runtime.
+void UnstartedRuntime::UnstartedClassGetDeclaredConstructor(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  mirror::Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+  if (klass == nullptr) {
+    ThrowNullPointerExceptionForMethodAccess(shadow_frame->GetMethod(), InvokeType::kVirtual);
+    return;
+  }
+  mirror::ObjectArray<mirror::Class>* args =
+      shadow_frame->GetVRegReference(arg_offset + 1)->AsObjectArray<mirror::Class>();
+  if (Runtime::Current()->IsActiveTransaction()) {
+    result->SetL(mirror::Class::GetDeclaredConstructorInternal<true>(self, klass, args));
+  } else {
+    result->SetL(mirror::Class::GetDeclaredConstructorInternal<false>(self, klass, args));
+  }
+}
+
 void UnstartedRuntime::UnstartedClassGetEnclosingClass(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   StackHandleScope<1> hs(self);
@@ -292,6 +353,171 @@
   result->SetL(klass->GetDexFile().GetEnclosingClass(klass));
 }
 
+void UnstartedRuntime::UnstartedClassGetInnerClassFlags(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Class> klass(hs.NewHandle(
+      reinterpret_cast<mirror::Class*>(shadow_frame->GetVRegReference(arg_offset))));
+  const int32_t default_value = shadow_frame->GetVReg(arg_offset + 1);
+  result->SetI(mirror::Class::GetInnerClassFlags(klass, default_value));
+}
+
+static std::unique_ptr<MemMap> FindAndExtractEntry(const std::string& jar_file,
+                                                   const char* entry_name,
+                                                   size_t* size,
+                                                   std::string* error_msg) {
+  CHECK(size != nullptr);
+
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(jar_file.c_str(), error_msg));
+  if (zip_archive == nullptr) {
+    return nullptr;;
+  }
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(entry_name, error_msg));
+  if (zip_entry == nullptr) {
+    return nullptr;
+  }
+  std::unique_ptr<MemMap> tmp_map(
+      zip_entry->ExtractToMemMap(jar_file.c_str(), entry_name, error_msg));
+  if (tmp_map == nullptr) {
+    return nullptr;
+  }
+
+  // OK, from here everything seems fine.
+  *size = zip_entry->GetUncompressedLength();
+  return tmp_map;
+}
+
+static void GetResourceAsStream(Thread* self,
+                                ShadowFrame* shadow_frame,
+                                JValue* result,
+                                size_t arg_offset) SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* resource_obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (resource_obj == nullptr) {
+    AbortTransactionOrFail(self, "null name for getResourceAsStream");
+    return;
+  }
+  CHECK(resource_obj->IsString());
+  mirror::String* resource_name = resource_obj->AsString();
+
+  std::string resource_name_str = resource_name->ToModifiedUtf8();
+  if (resource_name_str.empty() || resource_name_str == "/") {
+    AbortTransactionOrFail(self,
+                           "Unsupported name %s for getResourceAsStream",
+                           resource_name_str.c_str());
+    return;
+  }
+  const char* resource_cstr = resource_name_str.c_str();
+  if (resource_cstr[0] == '/') {
+    resource_cstr++;
+  }
+
+  Runtime* runtime = Runtime::Current();
+
+  std::vector<std::string> split;
+  Split(runtime->GetBootClassPathString(), ':', &split);
+  if (split.empty()) {
+    AbortTransactionOrFail(self,
+                           "Boot classpath not set or split error:: %s",
+                           runtime->GetBootClassPathString().c_str());
+    return;
+  }
+
+  std::unique_ptr<MemMap> mem_map;
+  size_t map_size;
+  std::string last_error_msg;  // Only store the last message (we could concatenate).
+
+  for (const std::string& jar_file : split) {
+    mem_map = FindAndExtractEntry(jar_file, resource_cstr, &map_size, &last_error_msg);
+    if (mem_map != nullptr) {
+      break;
+    }
+  }
+
+  if (mem_map == nullptr) {
+    // Didn't find it. There's a good chance this will be the same at runtime, but still
+    // conservatively abort the transaction here.
+    AbortTransactionOrFail(self,
+                           "Could not find resource %s. Last error was %s.",
+                           resource_name_str.c_str(),
+                           last_error_msg.c_str());
+    return;
+  }
+
+  StackHandleScope<3> hs(self);
+
+  // Create byte array for content.
+  Handle<mirror::ByteArray> h_array(hs.NewHandle(mirror::ByteArray::Alloc(self, map_size)));
+  if (h_array.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find/create byte array class");
+    return;
+  }
+  // Copy in content.
+  memcpy(h_array->GetData(), mem_map->Begin(), map_size);
+  // Be proactive releasing memory.
+  mem_map.release();
+
+  // Create a ByteArrayInputStream.
+  Handle<mirror::Class> h_class(hs.NewHandle(
+      runtime->GetClassLinker()->FindClass(self,
+                                           "Ljava/io/ByteArrayInputStream;",
+                                           ScopedNullHandle<mirror::ClassLoader>())));
+  if (h_class.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find ByteArrayInputStream class");
+    return;
+  }
+  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
+    AbortTransactionOrFail(self, "Could not initialize ByteArrayInputStream class");
+    return;
+  }
+
+  Handle<mirror::Object> h_obj(hs.NewHandle(h_class->AllocObject(self)));
+  if (h_obj.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not allocate ByteArrayInputStream object");
+    return;
+  }
+
+  auto* cl = Runtime::Current()->GetClassLinker();
+  ArtMethod* constructor = h_class->FindDeclaredDirectMethod(
+      "<init>", "([B)V", cl->GetImagePointerSize());
+  if (constructor == nullptr) {
+    AbortTransactionOrFail(self, "Could not find ByteArrayInputStream constructor");
+    return;
+  }
+
+  uint32_t args[1];
+  args[0] = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_array.Get()));
+  EnterInterpreterFromInvoke(self, constructor, h_obj.Get(), args, nullptr);
+
+  if (self->IsExceptionPending()) {
+    AbortTransactionOrFail(self, "Could not run ByteArrayInputStream constructor");
+    return;
+  }
+
+  result->SetL(h_obj.Get());
+}
+
+void UnstartedRuntime::UnstartedClassLoaderGetResourceAsStream(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  {
+    mirror::Object* this_obj = shadow_frame->GetVRegReference(arg_offset);
+    CHECK(this_obj != nullptr);
+    CHECK(this_obj->IsClassLoader());
+
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> this_classloader_class(hs.NewHandle(this_obj->GetClass()));
+
+    if (self->DecodeJObject(WellKnownClasses::java_lang_BootClassLoader) !=
+            this_classloader_class.Get()) {
+      AbortTransactionOrFail(self,
+                            "Unsupported classloader type %s for getResourceAsStream",
+                            PrettyClass(this_classloader_class.Get()).c_str());
+      return;
+    }
+  }
+
+  GetResourceAsStream(self, shadow_frame, result, arg_offset);
+}
+
 void UnstartedRuntime::UnstartedVmClassLoaderFindLoadedClass(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   mirror::String* class_name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
@@ -353,28 +579,35 @@
   jint src_pos = shadow_frame->GetVReg(arg_offset + 1);
   jint dst_pos = shadow_frame->GetVReg(arg_offset + 3);
   jint length = shadow_frame->GetVReg(arg_offset + 4);
-  mirror::Array* src_array = shadow_frame->GetVRegReference(arg_offset)->AsArray();
-  mirror::Array* dst_array = shadow_frame->GetVRegReference(arg_offset + 2)->AsArray();
 
-  // Null checking.
-  if (src_array == nullptr) {
+  mirror::Object* src_obj = shadow_frame->GetVRegReference(arg_offset);
+  mirror::Object* dst_obj = shadow_frame->GetVRegReference(arg_offset + 2);
+  // Null checking. For simplicity, abort transaction.
+  if (src_obj == nullptr) {
     AbortTransactionOrFail(self, "src is null in arraycopy.");
     return;
   }
-  if (dst_array == nullptr) {
+  if (dst_obj == nullptr) {
     AbortTransactionOrFail(self, "dst is null in arraycopy.");
     return;
   }
+  // Test for arrayness. Throw ArrayStoreException.
+  if (!src_obj->IsArrayInstance() || !dst_obj->IsArrayInstance()) {
+    self->ThrowNewException("Ljava/lang/ArrayStoreException;", "src or trg is not an array");
+    return;
+  }
 
-  // Bounds checking.
+  mirror::Array* src_array = src_obj->AsArray();
+  mirror::Array* dst_array = dst_obj->AsArray();
+
+  // Bounds checking. Throw IndexOutOfBoundsException.
   if (UNLIKELY(src_pos < 0) || UNLIKELY(dst_pos < 0) || UNLIKELY(length < 0) ||
       UNLIKELY(src_pos > src_array->GetLength() - length) ||
       UNLIKELY(dst_pos > dst_array->GetLength() - length)) {
-    self->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
+    self->ThrowNewExceptionF("Ljava/lang/IndexOutOfBoundsException;",
                              "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
                              src_array->GetLength(), src_pos, dst_array->GetLength(), dst_pos,
                              length);
-    AbortTransactionOrFail(self, "Index out of bounds.");
     return;
   }
 
@@ -393,19 +626,11 @@
       return;
     }
 
-    // For simplicity only do this if the component types are the same. Otherwise we have to copy
-    // even more code from the object-array functions.
-    if (src_type != trg_type) {
-      AbortTransactionOrFail(self, "Types not the same in arraycopy: %s vs %s",
-                             PrettyDescriptor(src_array->GetClass()->GetComponentType()).c_str(),
-                             PrettyDescriptor(dst_array->GetClass()->GetComponentType()).c_str());
-      return;
-    }
-
     mirror::ObjectArray<mirror::Object>* src = src_array->AsObjectArray<mirror::Object>();
     mirror::ObjectArray<mirror::Object>* dst = dst_array->AsObjectArray<mirror::Object>();
     if (src == dst) {
       // Can overlap, but not have type mismatches.
+      // We cannot use ObjectArray::MemMove here, as it doesn't support transactions.
       const bool copy_forward = (dst_pos < src_pos) || (dst_pos - src_pos >= length);
       if (copy_forward) {
         for (int32_t i = 0; i < length; ++i) {
@@ -417,11 +642,19 @@
         }
       }
     } else {
-      // Can't overlap. Would need type checks, but we abort above.
-      for (int32_t i = 0; i < length; ++i) {
-        dst->Set(dst_pos + i, src->Get(src_pos + i));
+      // We're being lazy here. Optimally this could be a memcpy (if component types are
+      // assignable), but the ObjectArray implementation doesn't support transactions. The
+      // checking version, however, does.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        dst->AssignableCheckingMemcpy<true>(
+            dst_pos, src, src_pos, length, true /* throw_exception */);
+      } else {
+        dst->AssignableCheckingMemcpy<false>(
+                    dst_pos, src, src_pos, length, true /* throw_exception */);
       }
     }
+  } else if (src_type->IsPrimitiveByte()) {
+    PrimitiveArrayCopy<uint8_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveChar()) {
     PrimitiveArrayCopy<uint16_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveInt()) {
@@ -432,6 +665,12 @@
   }
 }
 
+void UnstartedRuntime::UnstartedSystemArraycopyByte(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Just forward.
+  UnstartedRuntime::UnstartedSystemArraycopy(self, shadow_frame, result, arg_offset);
+}
+
 void UnstartedRuntime::UnstartedSystemArraycopyChar(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   // Just forward.
@@ -444,46 +683,137 @@
   UnstartedRuntime::UnstartedSystemArraycopy(self, shadow_frame, result, arg_offset);
 }
 
+void UnstartedRuntime::UnstartedSystemGetSecurityManager(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame ATTRIBUTE_UNUSED,
+    JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
+  result->SetL(nullptr);
+}
+
+static constexpr const char* kAndroidHardcodedSystemPropertiesFieldName = "STATIC_PROPERTIES";
+
+static void GetSystemProperty(Thread* self,
+                              ShadowFrame* shadow_frame,
+                              JValue* result,
+                              size_t arg_offset,
+                              bool is_default_version)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  StackHandleScope<4> hs(self);
+  Handle<mirror::String> h_key(
+      hs.NewHandle(reinterpret_cast<mirror::String*>(shadow_frame->GetVRegReference(arg_offset))));
+  if (h_key.Get() == nullptr) {
+    AbortTransactionOrFail(self, "getProperty key was null");
+    return;
+  }
+
+  // This is overall inefficient, but reflecting the values here is not great, either. So
+  // for simplicity, and with the assumption that the number of getProperty calls is not
+  // too great, just iterate each time.
+
+  // Get the storage class.
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Handle<mirror::Class> h_props_class(hs.NewHandle(
+      class_linker->FindClass(self,
+                              "Ljava/lang/AndroidHardcodedSystemProperties;",
+                              ScopedNullHandle<mirror::ClassLoader>())));
+  if (h_props_class.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Could not find AndroidHardcodedSystemProperties");
+    return;
+  }
+  if (!class_linker->EnsureInitialized(self, h_props_class, true, true)) {
+    AbortTransactionOrFail(self, "Could not initialize AndroidHardcodedSystemProperties");
+    return;
+  }
+
+  // Get the storage array.
+  ArtField* static_properties =
+      h_props_class->FindDeclaredStaticField(kAndroidHardcodedSystemPropertiesFieldName,
+                                             "[[Ljava/lang/String;");
+  if (static_properties == nullptr) {
+    AbortTransactionOrFail(self,
+                           "Could not find %s field",
+                           kAndroidHardcodedSystemPropertiesFieldName);
+    return;
+  }
+  Handle<mirror::ObjectArray<mirror::ObjectArray<mirror::String>>> h_2string_array(
+      hs.NewHandle(reinterpret_cast<mirror::ObjectArray<mirror::ObjectArray<mirror::String>>*>(
+          static_properties->GetObject(h_props_class.Get()))));
+  if (h_2string_array.Get() == nullptr) {
+    AbortTransactionOrFail(self, "Field %s is null", kAndroidHardcodedSystemPropertiesFieldName);
+    return;
+  }
+
+  // Iterate over it.
+  const int32_t prop_count = h_2string_array->GetLength();
+  // Use the third handle as mutable.
+  MutableHandle<mirror::ObjectArray<mirror::String>> h_string_array(
+      hs.NewHandle<mirror::ObjectArray<mirror::String>>(nullptr));
+  for (int32_t i = 0; i < prop_count; ++i) {
+    h_string_array.Assign(h_2string_array->Get(i));
+    if (h_string_array.Get() == nullptr ||
+        h_string_array->GetLength() != 2 ||
+        h_string_array->Get(0) == nullptr) {
+      AbortTransactionOrFail(self,
+                             "Unexpected content of %s",
+                             kAndroidHardcodedSystemPropertiesFieldName);
+      return;
+    }
+    if (h_key->Equals(h_string_array->Get(0))) {
+      // Found a value.
+      if (h_string_array->Get(1) == nullptr && is_default_version) {
+        // Null is being delegated to the default map, and then resolved to the given default value.
+        // As there's no default map, return the given value.
+        result->SetL(shadow_frame->GetVRegReference(arg_offset + 1));
+      } else {
+        result->SetL(h_string_array->Get(1));
+      }
+      return;
+    }
+  }
+
+  // Key is not supported.
+  AbortTransactionOrFail(self, "getProperty key %s not supported", h_key->ToModifiedUtf8().c_str());
+}
+
+void UnstartedRuntime::UnstartedSystemGetProperty(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  GetSystemProperty(self, shadow_frame, result, arg_offset, false);
+}
+
+void UnstartedRuntime::UnstartedSystemGetPropertyWithDefault(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  GetSystemProperty(self, shadow_frame, result, arg_offset, true);
+}
+
 void UnstartedRuntime::UnstartedThreadLocalGet(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
   std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
   bool ok = false;
-  if (caller == "java.lang.String java.lang.IntegralToString.convertInt"
-                "(java.lang.AbstractStringBuilder, int)") {
+  if (caller == "void java.lang.FloatingDecimal.developLongDigits(int, long, long)" ||
+      caller == "java.lang.String java.lang.FloatingDecimal.toJavaFormatString()") {
     // Allocate non-threadlocal buffer.
-    result->SetL(mirror::CharArray::Alloc(self, 11));
+    result->SetL(mirror::CharArray::Alloc(self, 26));
     ok = true;
-  } else if (caller == "java.lang.RealToString java.lang.RealToString.getInstance()") {
-    // Note: RealToString is implemented and used in a different fashion than IntegralToString.
-    // Conversion is done over an actual object of RealToString (the conversion method is an
-    // instance method). This means it is not as clear whether it is correct to return a new
-    // object each time. The caller needs to be inspected by hand to see whether it (incorrectly)
-    // stores the object for later use.
-    // See also b/19548084 for a possible rewrite and bringing it in line with IntegralToString.
-    if (shadow_frame->GetLink()->GetLink() != nullptr) {
-      std::string caller2(PrettyMethod(shadow_frame->GetLink()->GetLink()->GetMethod()));
-      if (caller2 == "java.lang.String java.lang.Double.toString(double)") {
-        // Allocate new object.
-        StackHandleScope<2> hs(self);
-        Handle<mirror::Class> h_real_to_string_class(hs.NewHandle(
-            shadow_frame->GetLink()->GetMethod()->GetDeclaringClass()));
-        Handle<mirror::Object> h_real_to_string_obj(hs.NewHandle(
-            h_real_to_string_class->AllocObject(self)));
-        if (h_real_to_string_obj.Get() != nullptr) {
-          auto* cl = Runtime::Current()->GetClassLinker();
-          ArtMethod* init_method = h_real_to_string_class->FindDirectMethod(
-              "<init>", "()V", cl->GetImagePointerSize());
-          if (init_method == nullptr) {
-            h_real_to_string_class->DumpClass(LOG(FATAL), mirror::Class::kDumpClassFullDetail);
-          } else {
-            JValue invoke_result;
-            EnterInterpreterFromInvoke(self, init_method, h_real_to_string_obj.Get(), nullptr,
-                                       nullptr);
-            if (!self->IsExceptionPending()) {
-              result->SetL(h_real_to_string_obj.Get());
-              ok = true;
-            }
-          }
+  } else if (caller ==
+             "java.lang.FloatingDecimal java.lang.FloatingDecimal.getThreadLocalInstance()") {
+    // Allocate new object.
+    StackHandleScope<2> hs(self);
+    Handle<mirror::Class> h_real_to_string_class(hs.NewHandle(
+        shadow_frame->GetLink()->GetMethod()->GetDeclaringClass()));
+    Handle<mirror::Object> h_real_to_string_obj(hs.NewHandle(
+        h_real_to_string_class->AllocObject(self)));
+    if (h_real_to_string_obj.Get() != nullptr) {
+      auto* cl = Runtime::Current()->GetClassLinker();
+      ArtMethod* init_method = h_real_to_string_class->FindDirectMethod(
+          "<init>", "()V", cl->GetImagePointerSize());
+      if (init_method == nullptr) {
+        h_real_to_string_class->DumpClass(LOG(FATAL), mirror::Class::kDumpClassFullDetail);
+      } else {
+        JValue invoke_result;
+        EnterInterpreterFromInvoke(self, init_method, h_real_to_string_obj.Get(), nullptr,
+                                   nullptr);
+        if (!self->IsExceptionPending()) {
+          result->SetL(h_real_to_string_obj.Get());
+          ok = true;
         }
       }
     }
@@ -496,17 +826,28 @@
 
 void UnstartedRuntime::UnstartedMathCeil(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
-  double in = shadow_frame->GetVRegDouble(arg_offset);
-  double out;
-  // Special cases:
-  // 1) NaN, infinity, +0, -0 -> out := in. All are guaranteed by cmath.
-  // -1 < in < 0 -> out := -0.
-  if (-1.0 < in && in < 0) {
-    out = -0.0;
-  } else {
-    out = ceil(in);
-  }
-  result->SetD(out);
+  result->SetD(ceil(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathFloor(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(floor(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathSin(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(sin(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathCos(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(cos(shadow_frame->GetVRegDouble(arg_offset)));
+}
+
+void UnstartedRuntime::UnstartedMathPow(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  result->SetD(pow(shadow_frame->GetVRegDouble(arg_offset),
+                   shadow_frame->GetVRegDouble(arg_offset + 2)));
 }
 
 void UnstartedRuntime::UnstartedObjectHashCode(
@@ -685,98 +1026,6 @@
   UnstartedMemoryPeekArray(Primitive::kPrimByte, self, shadow_frame, arg_offset);
 }
 
-// This allows reading security.properties in an unstarted runtime and initialize Security.
-void UnstartedRuntime::UnstartedSecurityGetSecurityPropertiesReader(
-    Thread* self, ShadowFrame* shadow_frame ATTRIBUTE_UNUSED, JValue* result,
-    size_t arg_offset ATTRIBUTE_UNUSED) {
-  Runtime* runtime = Runtime::Current();
-  const std::vector<const DexFile*>& path = runtime->GetClassLinker()->GetBootClassPath();
-  std::string canonical(DexFile::GetDexCanonicalLocation(path[0]->GetLocation().c_str()));
-  mirror::String* string_data;
-
-  // Use a block to enclose the I/O and MemMap code so buffers are released early.
-  {
-    std::string error_msg;
-    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(canonical.c_str(), &error_msg));
-    if (zip_archive.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not open zip file %s: %s", canonical.c_str(),
-                             error_msg.c_str());
-      return;
-    }
-    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find("java/security/security.properties",
-                                                          &error_msg));
-    if (zip_entry.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not find security.properties file in %s: %s",
-                             canonical.c_str(), error_msg.c_str());
-      return;
-    }
-    std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(canonical.c_str(),
-                                                           "java/security/security.properties",
-                                                           &error_msg));
-    if (map.get() == nullptr) {
-      AbortTransactionOrFail(self, "Could not unzip security.properties file in %s: %s",
-                             canonical.c_str(), error_msg.c_str());
-      return;
-    }
-
-    uint32_t length = zip_entry->GetUncompressedLength();
-    std::unique_ptr<char[]> tmp(new char[length + 1]);
-    memcpy(tmp.get(), map->Begin(), length);
-    tmp.get()[length] = 0;  // null terminator
-
-    string_data = mirror::String::AllocFromModifiedUtf8(self, tmp.get());
-  }
-
-  if (string_data == nullptr) {
-    AbortTransactionOrFail(self, "Could not create string from file content of %s",
-                           canonical.c_str());
-    return;
-  }
-
-  // Create a StringReader.
-  StackHandleScope<3> hs(self);
-  Handle<mirror::String> h_string(hs.NewHandle(string_data));
-
-  Handle<mirror::Class> h_class(hs.NewHandle(
-      runtime->GetClassLinker()->FindClass(self,
-                                           "Ljava/io/StringReader;",
-                                           ScopedNullHandle<mirror::ClassLoader>())));
-  if (h_class.Get() == nullptr) {
-    AbortTransactionOrFail(self, "Could not find StringReader class");
-    return;
-  }
-
-  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-    AbortTransactionOrFail(self, "Could not initialize StringReader class");
-    return;
-  }
-
-  Handle<mirror::Object> h_obj(hs.NewHandle(h_class->AllocObject(self)));
-  if (h_obj.Get() == nullptr) {
-    AbortTransactionOrFail(self, "Could not allocate StringReader object");
-    return;
-  }
-
-  auto* cl = Runtime::Current()->GetClassLinker();
-  ArtMethod* constructor = h_class->FindDeclaredDirectMethod(
-      "<init>", "(Ljava/lang/String;)V", cl->GetImagePointerSize());
-  if (constructor == nullptr) {
-    AbortTransactionOrFail(self, "Could not find StringReader constructor");
-    return;
-  }
-
-  uint32_t args[1];
-  args[0] = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_string.Get()));
-  EnterInterpreterFromInvoke(self, constructor, h_obj.Get(), args, nullptr);
-
-  if (self->IsExceptionPending()) {
-    AbortTransactionOrFail(self, "Could not run StringReader constructor");
-    return;
-  }
-
-  result->SetL(h_obj.Get());
-}
-
 // This allows reading the new style of String objects during compilation.
 void UnstartedRuntime::UnstartedStringGetCharsNoCheck(
     Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset) {
@@ -1010,6 +1259,24 @@
   result->SetL(value);
 }
 
+void UnstartedRuntime::UnstartedUnsafePutObjectVolatile(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* value = shadow_frame->GetVRegReference(arg_offset + 4);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    obj->SetFieldObjectVolatile<true>(MemberOffset(offset), value);
+  } else {
+    obj->SetFieldObjectVolatile<false>(MemberOffset(offset), value);
+  }
+}
+
 void UnstartedRuntime::UnstartedUnsafePutOrderedObject(
     Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1029,6 +1296,123 @@
   }
 }
 
+// A cutout for Integer.parseInt(String). Note: this code is conservative and will bail instead
+// of correctly handling the corner cases.
+void UnstartedRuntime::UnstartedIntegerParseInt(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetI(static_cast<int32_t>(l));
+}
+
+// A cutout for Long.parseLong.
+//
+// Note: for now use code equivalent to Integer.parseInt, as the full range may not be supported
+//       well.
+void UnstartedRuntime::UnstartedLongParseLong(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  // Note: comparing against int32_t min/max is intentional here.
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetJ(l);
+}
+
+void UnstartedRuntime::UnstartedMethodInvoke(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedObjectAccessUnchecked soa(self);
+
+  mirror::Object* java_method_obj = shadow_frame->GetVRegReference(arg_offset);
+  ScopedLocalRef<jobject> java_method(env,
+      java_method_obj == nullptr ? nullptr :env->AddLocalReference<jobject>(java_method_obj));
+
+  mirror::Object* java_receiver_obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  ScopedLocalRef<jobject> java_receiver(env,
+      java_receiver_obj == nullptr ? nullptr : env->AddLocalReference<jobject>(java_receiver_obj));
+
+  mirror::Object* java_args_obj = shadow_frame->GetVRegReference(arg_offset + 2);
+  ScopedLocalRef<jobject> java_args(env,
+      java_args_obj == nullptr ? nullptr : env->AddLocalReference<jobject>(java_args_obj));
+
+  ScopedLocalRef<jobject> result_jobj(env,
+      InvokeMethod(soa, java_method.get(), java_receiver.get(), java_args.get()));
+
+  result->SetL(self->DecodeJObject(result_jobj.get()));
+
+  // Conservatively flag all exceptions as transaction aborts. This way we don't need to unwrap
+  // InvocationTargetExceptions.
+  if (self->IsExceptionPending()) {
+    AbortTransactionOrFail(self, "Failed Method.invoke");
+  }
+}
+
 
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
@@ -1220,6 +1604,19 @@
   result->SetZ(success ? JNI_TRUE : JNI_FALSE);
 }
 
+void UnstartedRuntime::UnstartedJNIUnsafeGetIntVolatile(
+    Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
+    uint32_t* args, JValue* result) {
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(args[0]);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+
+  jlong offset = (static_cast<uint64_t>(args[2]) << 32) | args[1];
+  result->SetI(obj->GetField32Volatile(MemberOffset(offset)));
+}
+
 void UnstartedRuntime::UnstartedJNIUnsafePutObject(
     Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
     mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result ATTRIBUTE_UNUSED) {
@@ -1299,7 +1696,13 @@
   if (iter != invoke_handlers_.end()) {
     // Clear out the result in case it's not zeroed out.
     result->SetL(0);
+
+    // Push the shadow frame. This is so the failing method can be seen in abort dumps.
+    self->PushShadowFrame(shadow_frame);
+
     (*iter->second)(self, shadow_frame, result, arg_offset);
+
+    self->PopShadowFrame();
   } else {
     // Not special, continue with regular interpreter execution.
     ArtInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index 29f2197..b8553b5 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -19,20 +19,33 @@
 
 // Methods that intercept available libcore implementations.
 #define UNSTARTED_RUNTIME_DIRECT_LIST(V)    \
+  V(CharacterToLowerCase, "int java.lang.Character.toLowerCase(int)") \
+  V(CharacterToUpperCase, "int java.lang.Character.toUpperCase(int)") \
   V(ClassForName, "java.lang.Class java.lang.Class.forName(java.lang.String)") \
   V(ClassForNameLong, "java.lang.Class java.lang.Class.forName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassClassForName, "java.lang.Class java.lang.Class.classForName(java.lang.String, boolean, java.lang.ClassLoader)") \
   V(ClassNewInstance, "java.lang.Object java.lang.Class.newInstance()") \
   V(ClassGetDeclaredField, "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") \
   V(ClassGetDeclaredMethod, "java.lang.reflect.Method java.lang.Class.getDeclaredMethodInternal(java.lang.String, java.lang.Class[])") \
+  V(ClassGetDeclaredConstructor, "java.lang.reflect.Constructor java.lang.Class.getDeclaredConstructorInternal(java.lang.Class[])") \
   V(ClassGetEnclosingClass, "java.lang.Class java.lang.Class.getEnclosingClass()") \
+  V(ClassGetInnerClassFlags, "int java.lang.Class.getInnerClassFlags(int)") \
+  V(ClassLoaderGetResourceAsStream, "java.io.InputStream java.lang.ClassLoader.getResourceAsStream(java.lang.String)") \
   V(VmClassLoaderFindLoadedClass, "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") \
   V(VoidLookupType, "java.lang.Class java.lang.Void.lookupType()") \
   V(SystemArraycopy, "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") \
+  V(SystemArraycopyByte, "void java.lang.System.arraycopy(byte[], int, byte[], int, int)") \
   V(SystemArraycopyChar, "void java.lang.System.arraycopy(char[], int, char[], int, int)") \
   V(SystemArraycopyInt, "void java.lang.System.arraycopy(int[], int, int[], int, int)") \
+  V(SystemGetSecurityManager, "java.lang.SecurityManager java.lang.System.getSecurityManager()") \
+  V(SystemGetProperty, "java.lang.String java.lang.System.getProperty(java.lang.String)") \
+  V(SystemGetPropertyWithDefault, "java.lang.String java.lang.System.getProperty(java.lang.String, java.lang.String)") \
   V(ThreadLocalGet, "java.lang.Object java.lang.ThreadLocal.get()") \
   V(MathCeil, "double java.lang.Math.ceil(double)") \
+  V(MathFloor, "double java.lang.Math.floor(double)") \
+  V(MathSin, "double java.lang.Math.sin(double)") \
+  V(MathCos, "double java.lang.Math.cos(double)") \
+  V(MathPow, "double java.lang.Math.pow(double, double)") \
   V(ObjectHashCode, "int java.lang.Object.hashCode()") \
   V(DoubleDoubleToRawLongBits, "long java.lang.Double.doubleToRawLongBits(double)") \
   V(DexCacheGetDexNative, "com.android.dex.Dex java.lang.DexCache.getDexNative()") \
@@ -41,9 +54,9 @@
   V(MemoryPeekInt, "int libcore.io.Memory.peekIntNative(long)") \
   V(MemoryPeekLong, "long libcore.io.Memory.peekLongNative(long)") \
   V(MemoryPeekByteArray, "void libcore.io.Memory.peekByteArray(long, byte[], int, int)") \
+  V(MethodInvoke, "java.lang.Object java.lang.reflect.Method.invoke(java.lang.Object, java.lang.Object[])") \
   V(ReferenceGetReferent, "java.lang.Object java.lang.ref.Reference.getReferent()") \
   V(RuntimeAvailableProcessors, "int java.lang.Runtime.availableProcessors()") \
-  V(SecurityGetSecurityPropertiesReader, "java.io.Reader java.security.Security.getSecurityPropertiesReader()") \
   V(StringGetCharsNoCheck, "void java.lang.String.getCharsNoCheck(int, int, char[], int)") \
   V(StringCharAt, "char java.lang.String.charAt(int)") \
   V(StringSetCharAt, "void java.lang.String.setCharAt(int, char)") \
@@ -54,7 +67,10 @@
   V(UnsafeCompareAndSwapLong, "boolean sun.misc.Unsafe.compareAndSwapLong(java.lang.Object, long, long, long)") \
   V(UnsafeCompareAndSwapObject, "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") \
   V(UnsafeGetObjectVolatile, "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") \
-  V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)")
+  V(UnsafePutObjectVolatile, "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") \
+  V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") \
+  V(IntegerParseInt, "int java.lang.Integer.parseInt(java.lang.String)") \
+  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)")
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
@@ -79,6 +95,7 @@
   V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)") \
   V(ByteOrderIsLittleEndian, "boolean java.nio.ByteOrder.isLittleEndian()") \
   V(UnsafeCompareAndSwapInt, "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") \
+  V(UnsafeGetIntVolatile, "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") \
   V(UnsafePutObject, "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") \
   V(UnsafeGetArrayBaseOffsetForComponentType, "int sun.misc.Unsafe.getArrayBaseOffsetForComponentType(java.lang.Class)") \
   V(UnsafeGetArrayIndexScaleForComponentType, "int sun.misc.Unsafe.getArrayIndexScaleForComponentType(java.lang.Class)")
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index a1ae2aa..814b001 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -16,6 +16,11 @@
 
 #include "unstarted_runtime.h"
 
+#include <limits>
+#include <locale>
+
+#include "base/casts.h"
+#include "base/memory_tool.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_instruction.h"
@@ -27,6 +32,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
+#include "transaction.h"
 
 namespace art {
 namespace interpreter {
@@ -66,6 +72,129 @@
 #undef UNSTARTED_RUNTIME_DIRECT_LIST
 #undef UNSTARTED_RUNTIME_JNI_LIST
 #undef UNSTARTED_JNI
+
+  // Helpers for ArrayCopy.
+  //
+  // Note: as we have to use handles, we use StackHandleScope to transfer data. Hardcode a size
+  //       of three everywhere. That is enough to test all cases.
+
+  static mirror::ObjectArray<mirror::Object>* CreateObjectArray(
+      Thread* self,
+      mirror::Class* component_type,
+      const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_type = runtime->GetClassLinker()->FindArrayClass(self, &component_type);
+    CHECK(array_type != nullptr);
+    mirror::ObjectArray<mirror::Object>* result =
+        mirror::ObjectArray<mirror::Object>::Alloc(self, array_type, 3);
+    CHECK(result != nullptr);
+    for (size_t i = 0; i < 3; ++i) {
+      result->Set(static_cast<int32_t>(i), data.GetReference(i));
+      CHECK(!self->IsExceptionPending());
+    }
+    return result;
+  }
+
+  static void CheckObjectArray(mirror::ObjectArray<mirror::Object>* array,
+                               const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CHECK_EQ(array->GetLength(), 3);
+    CHECK_EQ(data.NumberOfReferences(), 3U);
+    for (size_t i = 0; i < 3; ++i) {
+      EXPECT_EQ(data.GetReference(i), array->Get(static_cast<int32_t>(i))) << i;
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::ObjectArray<mirror::Object>* src,
+                    int32_t src_pos,
+                    mirror::ObjectArray<mirror::Object>* dst,
+                    int32_t dst_pos,
+                    int32_t length)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    JValue result;
+    tmp->SetVRegReference(0, src);
+    tmp->SetVReg(1, src_pos);
+    tmp->SetVRegReference(2, dst);
+    tmp->SetVReg(3, dst_pos);
+    tmp->SetVReg(4, length);
+    UnstartedSystemArraycopy(self, tmp, &result, 0);
+    bool exception_pending = self->IsExceptionPending();
+    EXPECT_EQ(exception_pending, expect_exception);
+    if (exception_pending) {
+      self->ClearException();
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::Class* src_component_class,
+                    mirror::Class* dst_component_class,
+                    const StackHandleScope<3>& src_data,
+                    int32_t src_pos,
+                    const StackHandleScope<3>& dst_data,
+                    int32_t dst_pos,
+                    int32_t length,
+                    const StackHandleScope<3>& expected_result)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    StackHandleScope<3> hs_misc(self);
+    Handle<mirror::Class> dst_component_handle(hs_misc.NewHandle(dst_component_class));
+
+    Handle<mirror::ObjectArray<mirror::Object>> src_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, src_component_class, src_data)));
+
+    Handle<mirror::ObjectArray<mirror::Object>> dst_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, dst_component_handle.Get(), dst_data)));
+
+    RunArrayCopy(self,
+                 tmp,
+                 expect_exception,
+                 src_handle.Get(),
+                 src_pos,
+                 dst_handle.Get(),
+                 dst_pos,
+                 length);
+    CheckObjectArray(dst_handle.Get(), expected_result);
+  }
+
+  void TestCeilFloor(bool ceil,
+                     Thread* self,
+                     ShadowFrame* tmp,
+                     double const test_pairs[][2],
+                     size_t num_pairs)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < num_pairs; ++i) {
+      tmp->SetVRegDouble(0, test_pairs[i][0]);
+
+      JValue result;
+      if (ceil) {
+        UnstartedMathCeil(self, tmp, &result, 0);
+      } else {
+        UnstartedMathFloor(self, tmp, &result, 0);
+      }
+
+      ASSERT_FALSE(self->IsExceptionPending());
+
+      // We want precise results.
+      int64_t result_int64t = bit_cast<int64_t, double>(result.GetD());
+      int64_t expect_int64t = bit_cast<int64_t, double>(test_pairs[i][1]);
+      EXPECT_EQ(expect_int64t, result_int64t) << result.GetD() << " vs " << test_pairs[i][1];
+    }
+  }
+
+  // Prepare for aborts. Aborts assume that the exception class is already resolved, as the
+  // loading code doesn't work under transactions.
+  void PrepareForAborts() SHARED_REQUIRES(Locks::mutator_lock_) {
+    mirror::Object* result = Runtime::Current()->GetClassLinker()->FindClass(
+        Thread::Current(),
+        Transaction::kAbortExceptionSignature,
+        ScopedNullHandle<mirror::ClassLoader>());
+    CHECK(result != nullptr);
+  }
 };
 
 TEST_F(UnstartedRuntimeTest, MemoryPeekByte) {
@@ -277,5 +406,467 @@
   ShadowFrame::DeleteDeoptimizedFrame(shadow_frame);
 }
 
+// Tests the exceptions that should be checked before modifying the destination.
+// (Doesn't check the object vs primitive case ATM.)
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTestExceptions) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Note: all tests are not GC safe. Assume there's no GC running here with the few objects we
+  //       allocate.
+  StackHandleScope<2> hs_misc(self);
+  Handle<mirror::Class> object_class(
+      hs_misc.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  StackHandleScope<3> hs_data(self);
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+  Handle<mirror::ObjectArray<mirror::Object>> array(
+      hs_misc.NewHandle(CreateObjectArray(self, object_class.Get(), hs_data)));
+
+  RunArrayCopy(self, tmp, true, array.Get(), -1, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), -1, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, -1);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, 4);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 1, 3);
+  RunArrayCopy(self, tmp, true, array.Get(), 1, array.Get(), 0, 3);
+
+  mirror::ObjectArray<mirror::Object>* class_as_array =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(object_class.Get());
+  RunArrayCopy(self, tmp, true, class_as_array, 0, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, class_as_array, 0, 0);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  StackHandleScope<1> hs_object(self);
+  Handle<mirror::Class> object_class(
+      hs_object.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  // Simple test:
+  // [1,2,3]{1 @ 2} into [4,5,6] = [4,2,6]
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 object_class.Get(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 2,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,2,3]{1 @ 1} into [4,5,6] = [4,2,6]  (with dst String[])
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 1,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,*,3] into [4,5,6] = [1,5,6] + exc
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::GetJavaLangString());
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_src.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 true,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 0,
+                 hs_dst,
+                 0,
+                 3,
+                 hs_expected);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, IntegerParseIntTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int32_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedIntegerParseInt(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetI(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+// Right now the same as Integer.Parse
+TEST_F(UnstartedRuntimeTest, LongParseLongTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int64_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedLongParseLong(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetJ(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Ceil) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  constexpr double nan = std::numeric_limits<double>::quiet_NaN();
+  constexpr double inf = std::numeric_limits<double>::infinity();
+  constexpr double ld1 = static_cast<double>((UINT64_C(1) << 53) - 1);
+  constexpr double ld2 = static_cast<double>(UINT64_C(1) << 55);
+  constexpr double test_pairs[][2] = {
+      { -0.0, -0.0 },
+      {  0.0,  0.0 },
+      { -0.5, -0.0 },
+      { -1.0, -1.0 },
+      {  0.5,  1.0 },
+      {  1.0,  1.0 },
+      {  nan,  nan },
+      {  inf,  inf },
+      { -inf, -inf },
+      {  ld1,  ld1 },
+      {  ld2,  ld2 }
+  };
+
+  TestCeilFloor(true /* ceil */, self, tmp, test_pairs, arraysize(test_pairs));
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Floor) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  constexpr double nan = std::numeric_limits<double>::quiet_NaN();
+  constexpr double inf = std::numeric_limits<double>::infinity();
+  constexpr double ld1 = static_cast<double>((UINT64_C(1) << 53) - 1);
+  constexpr double ld2 = static_cast<double>(UINT64_C(1) << 55);
+  constexpr double test_pairs[][2] = {
+      { -0.0, -0.0 },
+      {  0.0,  0.0 },
+      { -0.5, -1.0 },
+      { -1.0, -1.0 },
+      {  0.5,  0.0 },
+      {  1.0,  1.0 },
+      {  nan,  nan },
+      {  inf,  inf },
+      { -inf, -inf },
+      {  ld1,  ld1 },
+      {  ld2,  ld2 }
+  };
+
+  TestCeilFloor(false /* floor */, self, tmp, test_pairs, arraysize(test_pairs));
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, ToLowerUpper) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  std::locale c_locale("C");
+
+  // Check ASCII.
+  for (uint32_t i = 0; i < 128; ++i) {
+    bool c_upper = std::isupper(static_cast<char>(i), c_locale);
+    bool c_lower = std::islower(static_cast<char>(i), c_locale);
+    EXPECT_FALSE(c_upper && c_lower) << i;
+
+    // Check toLowerCase.
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      ASSERT_FALSE(self->IsExceptionPending());
+      uint32_t lower_result = static_cast<uint32_t>(result.GetI());
+      if (c_lower) {
+        EXPECT_EQ(i, lower_result);
+      } else if (c_upper) {
+        EXPECT_EQ(static_cast<uint32_t>(std::tolower(static_cast<char>(i), c_locale)),
+                  lower_result);
+      } else {
+        EXPECT_EQ(i, lower_result);
+      }
+    }
+
+    // Check toUpperCase.
+    {
+      JValue result2;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      UnstartedCharacterToUpperCase(self, tmp, &result2, 0);
+      ASSERT_FALSE(self->IsExceptionPending());
+      uint32_t upper_result = static_cast<uint32_t>(result2.GetI());
+      if (c_upper) {
+        EXPECT_EQ(i, upper_result);
+      } else if (c_lower) {
+        EXPECT_EQ(static_cast<uint32_t>(std::toupper(static_cast<char>(i), c_locale)),
+                  upper_result);
+      } else {
+        EXPECT_EQ(i, upper_result);
+      }
+    }
+  }
+
+  // Check abort for other things. Can't test all.
+
+  PrepareForAborts();
+
+  for (uint32_t i = 128; i < 256; ++i) {
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToUpperCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+  }
+  for (uint64_t i = 256; i <= std::numeric_limits<uint32_t>::max(); i <<= 1) {
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToLowerCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+    {
+      JValue result;
+      tmp->SetVReg(0, static_cast<int32_t>(i));
+      Transaction transaction;
+      Runtime::Current()->EnterTransactionMode(&transaction);
+      UnstartedCharacterToUpperCase(self, tmp, &result, 0);
+      Runtime::Current()->ExitTransactionMode();
+      ASSERT_TRUE(self->IsExceptionPending());
+      ASSERT_TRUE(transaction.IsAborted());
+    }
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Sin) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important value, PI/6. That's the one we see in practice.
+  constexpr uint64_t lvalue = UINT64_C(0x3fe0c152382d7365);
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue));
+
+  JValue result;
+  UnstartedMathSin(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3fdfffffffffffff), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Cos) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important value, PI/6. That's the one we see in practice.
+  constexpr uint64_t lvalue = UINT64_C(0x3fe0c152382d7365);
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue));
+
+  JValue result;
+  UnstartedMathCos(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3febb67ae8584cab), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, Pow) {
+  // Valgrind seems to get this wrong, actually. Disable for valgrind.
+  if (RUNNING_ON_MEMORY_TOOL != 0 && kMemoryToolIsValgrind) {
+    return;
+  }
+
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test an important pair.
+  constexpr uint64_t lvalue1 = UINT64_C(0x4079000000000000);
+  constexpr uint64_t lvalue2 = UINT64_C(0xbfe6db6dc0000000);
+
+  tmp->SetVRegLong(0, static_cast<int64_t>(lvalue1));
+  tmp->SetVRegLong(2, static_cast<int64_t>(lvalue2));
+
+  JValue result;
+  UnstartedMathPow(self, tmp, &result, 0);
+
+  const uint64_t lresult = static_cast<uint64_t>(result.GetJ());
+  EXPECT_EQ(UINT64_C(0x3f8c5c51326aa7ee), lresult);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index a41fd45..d983a9f 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -318,6 +318,7 @@
     }
     JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm);
     delete raw_vm->GetRuntime();
+    android::ResetNativeLoader();
     return JNI_OK;
   }
 
@@ -942,6 +943,11 @@
   if (!Runtime::Create(options, ignore_unrecognized)) {
     return JNI_ERR;
   }
+
+  // Initialize native loader. This step makes sure we have
+  // everything set up before we start using JNI.
+  android::InitializeNativeLoader();
+
   Runtime* runtime = Runtime::Current();
   bool started = runtime->Start();
   if (!started) {
@@ -950,6 +956,7 @@
     LOG(WARNING) << "CreateJavaVM failed";
     return JNI_ERR;
   }
+
   *p_env = Thread::Current()->GetJniEnv();
   *p_vm = runtime->GetJavaVM();
   return JNI_OK;
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index 51952c4..e9d6d07 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "cutils/sockets.h"
 #endif
 
@@ -224,7 +224,7 @@
        */
       int  ret = connect(control_sock_, &control_addr_.controlAddrPlain, control_addr_len_);
       if (!ret) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
         if (!socket_peer_is_trusted(control_sock_)) {
           if (shutdown(control_sock_, SHUT_RDWR)) {
             PLOG(ERROR) << "trouble shutting down socket";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 668d5dc..dbf04fe 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -251,7 +251,7 @@
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
       break;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     case kJdwpTransportAndroidAdb:
       InitAdbTransport(state.get(), options);
       break;
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index d9d7a19..7cdd7c5 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -70,15 +70,19 @@
 
 static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
 
-static JITCodeEntry* CreateJITCodeEntryInternal(
-    std::unique_ptr<const uint8_t[]> symfile_addr,
-    uintptr_t symfile_size)
+static JITCodeEntry* CreateJITCodeEntryInternal(std::vector<uint8_t> symfile)
     REQUIRES(g_jit_debug_mutex) {
-  DCHECK(symfile_addr.get() != nullptr);
+  DCHECK_NE(symfile.size(), 0u);
+
+  // Make a copy of the buffer. We want to shrink it anyway.
+  uint8_t* symfile_copy = new uint8_t[symfile.size()];
+  CHECK(symfile_copy != nullptr);
+  memcpy(symfile_copy, symfile.data(), symfile.size());
 
   JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr.release();
-  entry->symfile_size_ = symfile_size;
+  CHECK(entry != nullptr);
+  entry->symfile_addr_ = symfile_copy;
+  entry->symfile_size_ = symfile.size();
   entry->prev_ = nullptr;
 
   entry->next_ = __jit_debug_descriptor.first_entry_;
@@ -111,11 +115,10 @@
   delete entry;
 }
 
-JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
-                                 uintptr_t symfile_size) {
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile) {
   Thread* self = Thread::Current();
   MutexLock mu(self, g_jit_debug_mutex);
-  return CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  return CreateJITCodeEntryInternal(std::move(symfile));
 }
 
 void DeleteJITCodeEntry(JITCodeEntry* entry) {
@@ -128,14 +131,12 @@
 // so that the user of the JIT interface does not have to store them.
 static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
 
-void CreateJITCodeEntryForAddress(uintptr_t address,
-                                  std::unique_ptr<const uint8_t[]> symfile_addr,
-                                  uintptr_t symfile_size) {
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile) {
   Thread* self = Thread::Current();
   MutexLock mu(self, g_jit_debug_mutex);
   DCHECK_NE(address, 0u);
   DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
-  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile));
   g_jit_code_entries.emplace(address, entry);
 }
 
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index 74469a9..d9bf331 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -19,6 +19,7 @@
 
 #include <inttypes.h>
 #include <memory>
+#include <vector>
 
 namespace art {
 
@@ -28,8 +29,7 @@
 
 // Notify native debugger about new JITed code by passing in-memory ELF.
 // It takes ownership of the in-memory ELF file.
-JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
-                                 uintptr_t symfile_size);
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile);
 
 // Notify native debugger that JITed code has been removed.
 // It also releases the associated in-memory ELF file.
@@ -38,9 +38,7 @@
 // Notify native debugger about new JITed code by passing in-memory ELF.
 // The address is used only to uniquely identify the entry.
 // It takes ownership of the in-memory ELF file.
-void CreateJITCodeEntryForAddress(uintptr_t address,
-                                  std::unique_ptr<const uint8_t[]> symfile_addr,
-                                  uintptr_t symfile_size);
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile);
 
 // Notify native debugger that JITed code has been removed.
 // Returns false if entry for the given address was not found.
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 7e73e5c..dcc6300 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -23,7 +23,6 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
-#include "jit_instrumentation.h"
 #include "oat_file_manager.h"
 #include "oat_quick_method_header.h"
 #include "offline_profiling_info.h"
@@ -31,16 +30,28 @@
 #include "runtime.h"
 #include "runtime_options.h"
 #include "stack_map.h"
+#include "thread_list.h"
 #include "utils.h"
 
 namespace art {
 namespace jit {
 
 static constexpr bool kEnableOnStackReplacement = true;
+// At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
+static constexpr int kJitPoolThreadPthreadPriority = 9;
+
+// JIT compiler
+void* Jit::jit_library_handle_= nullptr;
+void* Jit::jit_compiler_handle_ = nullptr;
+void* (*Jit::jit_load_)(bool*) = nullptr;
+void (*Jit::jit_unload_)(void*) = nullptr;
+bool (*Jit::jit_compile_method_)(void*, ArtMethod*, Thread*, bool) = nullptr;
+void (*Jit::jit_types_loaded_)(void*, mirror::Class**, size_t count) = nullptr;
+bool Jit::generate_debug_info_ = false;
 
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
-  jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
+  jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation);
 
   jit_options->code_cache_initial_capacity_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
@@ -77,9 +88,42 @@
     }
   }
 
+  if (options.Exists(RuntimeArgumentMap::JITPriorityThreadWeight)) {
+    jit_options->priority_thread_weight_ =
+        *options.Get(RuntimeArgumentMap::JITPriorityThreadWeight);
+    if (jit_options->priority_thread_weight_ > jit_options->warmup_threshold_) {
+      LOG(FATAL) << "Priority thread weight is above the warmup threshold.";
+    } else if (jit_options->priority_thread_weight_ == 0) {
+      LOG(FATAL) << "Priority thread weight cannot be 0.";
+    }
+  } else {
+    jit_options->priority_thread_weight_ = std::max(
+        jit_options->warmup_threshold_ / Jit::kDefaultPriorityThreadWeightRatio,
+        static_cast<size_t>(1));
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITInvokeTransitionWeight)) {
+    jit_options->invoke_transition_weight_ =
+        *options.Get(RuntimeArgumentMap::JITInvokeTransitionWeight);
+    if (jit_options->invoke_transition_weight_ > jit_options->warmup_threshold_) {
+      LOG(FATAL) << "Invoke transition weight is above the warmup threshold.";
+    } else if (jit_options->invoke_transition_weight_  == 0) {
+      LOG(FATAL) << "Invoke transition weight cannot be 0.";
+    }
+  } else {
+    jit_options->invoke_transition_weight_ = std::max(
+        jit_options->warmup_threshold_ / Jit::kDefaultInvokeTransitionWeightRatio,
+        static_cast<size_t>(1));;
+  }
+
   return jit_options;
 }
 
+bool Jit::ShouldUsePriorityThreadWeight() {
+  return Runtime::Current()->InJankPerceptibleProcessState()
+      && Thread::Current()->IsJitSensitiveThread();
+}
+
 void Jit::DumpInfo(std::ostream& os) {
   code_cache_->Dump(os);
   cumulative_timings_.Dump(os);
@@ -87,26 +131,27 @@
   memory_use_.PrintMemoryUse(os);
 }
 
+void Jit::DumpForSigQuit(std::ostream& os) {
+  DumpInfo(os);
+  ProfileSaver::DumpInstanceInfo(os);
+}
+
 void Jit::AddTimingLogger(const TimingLogger& logger) {
   cumulative_timings_.AddLogger(logger);
 }
 
-Jit::Jit() : jit_library_handle_(nullptr),
-             jit_compiler_handle_(nullptr),
-             jit_load_(nullptr),
-             jit_compile_method_(nullptr),
-             dump_info_on_shutdown_(false),
+Jit::Jit() : dump_info_on_shutdown_(false),
              cumulative_timings_("JIT timings"),
              memory_use_("Memory used for compilation", 16),
              lock_("JIT memory use lock"),
-             save_profiling_info_(false),
-             generate_debug_info_(false) {
-}
+             use_jit_compilation_(true),
+             save_profiling_info_(false) {}
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
+  DCHECK(options->UseJitCompilation() || options->GetSaveProfilingInfo());
   std::unique_ptr<Jit> jit(new Jit);
   jit->dump_info_on_shutdown_ = options->DumpJitInfoOnShutdown();
-  if (!jit->LoadCompiler(error_msg)) {
+  if (jit_compiler_handle_ == nullptr && !LoadCompiler(error_msg)) {
     return nullptr;
   }
   jit->code_cache_.reset(JitCodeCache::Create(
@@ -117,16 +162,29 @@
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
+  jit->use_jit_compilation_ = options->UseJitCompilation();
   jit->save_profiling_info_ = options->GetSaveProfilingInfo();
   VLOG(jit) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
       << ", compile_threshold=" << options->GetCompileThreshold()
       << ", save_profiling_info=" << options->GetSaveProfilingInfo();
+
+
+  jit->hot_method_threshold_ = options->GetCompileThreshold();
+  jit->warm_method_threshold_ = options->GetWarmupThreshold();
+  jit->osr_method_threshold_ = options->GetOsrThreshold();
+  jit->priority_thread_weight_ = options->GetPriorityThreadWeight();
+  jit->invoke_transition_weight_ = options->GetInvokeTransitionWeight();
+
+  jit->CreateThreadPool();
+
+  // Notify native debugger about the classes already loaded before the creation of the jit.
+  jit->DumpTypeInfoForLoadedTypes(Runtime::Current()->GetClassLinker());
   return jit.release();
 }
 
-bool Jit::LoadCompiler(std::string* error_msg) {
+bool Jit::LoadCompilerLibrary(std::string* error_msg) {
   jit_library_handle_ = dlopen(
       kIsDebugBuild ? "libartd-compiler.so" : "libart-compiler.so", RTLD_NOW);
   if (jit_library_handle_ == nullptr) {
@@ -162,6 +220,13 @@
     *error_msg = "JIT couldn't find jit_types_loaded entry point";
     return false;
   }
+  return true;
+}
+
+bool Jit::LoadCompiler(std::string* error_msg) {
+  if (jit_library_handle_ == nullptr && !LoadCompilerLibrary(error_msg)) {
+    return false;
+  }
   bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
@@ -176,6 +241,7 @@
 }
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
+  DCHECK(Runtime::Current()->UseJitCompilation());
   DCHECK(!method->IsRuntimeMethod());
 
   // Don't compile the method if it has breakpoints.
@@ -197,19 +263,46 @@
   if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) {
     return false;
   }
+
+  VLOG(jit) << "Compiling method "
+            << PrettyMethod(method_to_compile)
+            << " osr=" << std::boolalpha << osr;
   bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
-  code_cache_->DoneCompiling(method_to_compile, self);
+  code_cache_->DoneCompiling(method_to_compile, self, osr);
+  if (!success) {
+    VLOG(jit) << "Failed to compile method "
+              << PrettyMethod(method_to_compile)
+              << " osr=" << std::boolalpha << osr;
+  }
   return success;
 }
 
 void Jit::CreateThreadPool() {
-  CHECK(instrumentation_cache_.get() != nullptr);
-  instrumentation_cache_->CreateThreadPool();
+  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
+  // is not null when we instrument.
+  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
+  thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
+  thread_pool_->StartWorkers(Thread::Current());
 }
 
 void Jit::DeleteThreadPool() {
-  if (instrumentation_cache_.get() != nullptr) {
-    instrumentation_cache_->DeleteThreadPool(Thread::Current());
+  Thread* self = Thread::Current();
+  DCHECK(Runtime::Current()->IsShuttingDown(self));
+  if (thread_pool_ != nullptr) {
+    ThreadPool* cache = nullptr;
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      // Clear thread_pool_ field while the threads are suspended.
+      // A mutator in the 'AddSamples' method will check against it.
+      cache = thread_pool_.release();
+    }
+    cache->StopWorkers(self);
+    cache->RemoveAllTasks(self);
+    // We could just suspend all threads, but we know those threads
+    // will finish in a short period, so it's not worth adding a suspend logic
+    // here. Besides, this is only done for shutdown.
+    cache->Wait(self, false, false);
+    delete cache;
   }
 }
 
@@ -224,15 +317,12 @@
 
 void Jit::StopProfileSaver() {
   if (save_profiling_info_ && ProfileSaver::IsStarted()) {
-    ProfileSaver::Stop();
+    ProfileSaver::Stop(dump_info_on_shutdown_);
   }
 }
 
 bool Jit::JitAtFirstUse() {
-  if (instrumentation_cache_ != nullptr) {
-    return instrumentation_cache_->HotMethodThreshold() == 0;
-  }
-  return false;
+  return HotMethodThreshold() == 0;
 }
 
 bool Jit::CanInvokeCompiledCode(ArtMethod* method) {
@@ -247,22 +337,21 @@
   DeleteThreadPool();
   if (jit_compiler_handle_ != nullptr) {
     jit_unload_(jit_compiler_handle_);
+    jit_compiler_handle_ = nullptr;
   }
   if (jit_library_handle_ != nullptr) {
     dlclose(jit_library_handle_);
+    jit_library_handle_ = nullptr;
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold,
-                                     size_t warmup_threshold,
-                                     size_t osr_threshold) {
-  instrumentation_cache_.reset(
-      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold, osr_threshold));
-}
-
 void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
+  if (!Runtime::Current()->UseJitCompilation()) {
+    // No need to notify if we only use the JIT to save profiles.
+    return;
+  }
   jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr && jit->generate_debug_info_) {
+  if (jit->generate_debug_info_) {
     DCHECK(jit->jit_types_loaded_ != nullptr);
     jit->jit_types_loaded_(jit->jit_compiler_handle_, &type, 1);
   }
@@ -307,11 +396,6 @@
     return false;
   }
 
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
-    VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
-    return false;
-  }
-
   if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
     // Don't attempt to do an OSR if we are close to the stack limit. Since
     // the interpreter frames are still on stack, OSR has the potential
@@ -350,7 +434,7 @@
     }
 
     CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
     // Find stack map starting at the target dex_pc.
     StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
@@ -409,7 +493,8 @@
       }
     }
 
-    native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
+    native_pc = stack_map.GetNativePcOffset(encoding.stack_map_encoding) +
+        osr_method->GetEntryPoint();
     VLOG(jit) << "Jumping to "
               << method_name
               << "@"
@@ -448,5 +533,164 @@
   memory_use_.AddValue(bytes);
 }
 
+class JitCompileTask FINAL : public Task {
+ public:
+  enum TaskKind {
+    kAllocateProfile,
+    kCompile,
+    kCompileOsr
+  };
+
+  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Add a global ref to the class to prevent class unloading until compilation is done.
+    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
+    CHECK(klass_ != nullptr);
+  }
+
+  ~JitCompileTask() {
+    ScopedObjectAccess soa(Thread::Current());
+    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
+  }
+
+  void Run(Thread* self) OVERRIDE {
+    ScopedObjectAccess soa(self);
+    if (kind_ == kCompile) {
+      Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false);
+    } else if (kind_ == kCompileOsr) {
+      Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true);
+    } else {
+      DCHECK(kind_ == kAllocateProfile);
+      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+      }
+    }
+  }
+
+  void Finalize() OVERRIDE {
+    delete this;
+  }
+
+ private:
+  ArtMethod* const method_;
+  const TaskKind kind_;
+  jobject klass_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
+};
+
+void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count, bool with_backedges) {
+  if (thread_pool_ == nullptr) {
+    // Should only see this when shutting down.
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+
+  if (method->IsClassInitializer() || method->IsNative() || !method->IsCompilable()) {
+    // We do not want to compile such methods.
+    return;
+  }
+  DCHECK(thread_pool_ != nullptr);
+  DCHECK_GT(warm_method_threshold_, 0);
+  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
+  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
+  DCHECK_GE(priority_thread_weight_, 1);
+  DCHECK_LE(priority_thread_weight_, hot_method_threshold_);
+
+  int32_t starting_count = method->GetCounter();
+  if (Jit::ShouldUsePriorityThreadWeight()) {
+    count *= priority_thread_weight_;
+  }
+  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
+  if (starting_count < warm_method_threshold_) {
+    if ((new_count >= warm_method_threshold_) &&
+        (method->GetProfilingInfo(sizeof(void*)) == nullptr)) {
+      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+      if (success) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+      }
+
+      if (thread_pool_ == nullptr) {
+        // Calling ProfilingInfo::Create might put us in a suspended state, which could
+        // lead to the thread pool being deleted when we are shutting down.
+        DCHECK(Runtime::Current()->IsShuttingDown(self));
+        return;
+      }
+
+      if (!success) {
+        // We failed allocating. Instead of doing the collection on the Java thread, we push
+        // an allocation to a compiler thread, that will do the collection.
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      }
+    }
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, hot_method_threshold_ - 1);
+  } else if (use_jit_compilation_) {
+    if (starting_count < hot_method_threshold_) {
+      if ((new_count >= hot_method_threshold_) &&
+          !code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+        DCHECK(thread_pool_ != nullptr);
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
+      }
+      // Avoid jumping more than one state at a time.
+      new_count = std::min(new_count, osr_method_threshold_ - 1);
+    } else if (starting_count < osr_method_threshold_) {
+      if (!with_backedges) {
+        // If the samples don't contain any back edge, we don't increment the hotness.
+        return;
+      }
+      if ((new_count >= osr_method_threshold_) &&  !code_cache_->IsOsrCompiled(method)) {
+        DCHECK(thread_pool_ != nullptr);
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+      }
+    }
+  }
+  // Update hotness counter
+  method->SetCounter(new_count);
+}
+
+void Jit::MethodEntered(Thread* thread, ArtMethod* method) {
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse())) {
+    // The compiler requires a ProfilingInfo object.
+    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
+    JitCompileTask compile_task(method, JitCompileTask::kCompile);
+    compile_task.Run(thread);
+    return;
+  }
+
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
+  // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
+  // instead of interpreting the method.
+  if ((profiling_info != nullptr) && (profiling_info->GetSavedEntryPoint() != nullptr)) {
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, profiling_info->GetSavedEntryPoint());
+  } else {
+    AddSamples(thread, method, 1, /* with_backedges */false);
+  }
+}
+
+void Jit::InvokeVirtualOrInterface(Thread* thread,
+                                   mirror::Object* this_object,
+                                   ArtMethod* caller,
+                                   uint32_t dex_pc,
+                                   ArtMethod* callee ATTRIBUTE_UNUSED) {
+  ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    // Since the instrumentation is marked from the declaring class we need to mark the card so
+    // that mod-union tables and card rescanning know about the update.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
+    info->AddInvokeInfo(dex_pc, this_object->GetClass());
+  }
+}
+
+void Jit::WaitForCompilationToFinish(Thread* self) {
+  if (thread_pool_ != nullptr) {
+    thread_pool_->Wait(self, false, false);
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 37d0bdb..f3a6240 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -34,28 +34,32 @@
 namespace jit {
 
 class JitCodeCache;
-class JitInstrumentationCache;
 class JitOptions;
 
+static constexpr int16_t kJitCheckForOSR = -1;
+static constexpr int16_t kJitHotnessDisabled = -2;
+
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
   static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 10000;
+  static constexpr size_t kDefaultPriorityThreadWeightRatio = 1000;
+  static constexpr size_t kDefaultInvokeTransitionWeightRatio = 500;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
   bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold,
-                                  size_t warmup_threshold,
-                                  size_t osr_threshold);
   void CreateThreadPool();
+
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
+
   JitCodeCache* GetCodeCache() {
     return code_cache_.get();
   }
+
   void DeleteThreadPool();
   // Dump interesting info: #methods compiled, code vs data size, compile / verify cumulative
   // loggers.
@@ -67,8 +71,56 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  JitInstrumentationCache* GetInstrumentationCache() const {
-    return instrumentation_cache_.get();
+  size_t OSRMethodThreshold() const {
+    return osr_method_threshold_;
+  }
+
+  size_t HotMethodThreshold() const {
+    return hot_method_threshold_;
+  }
+
+  size_t WarmMethodThreshold() const {
+    return warm_method_threshold_;
+  }
+
+  uint16_t PriorityThreadWeight() const {
+    return priority_thread_weight_;
+  }
+
+  // Returns false if we only need to save profile information and not compile methods.
+  bool UseJitCompilation() const {
+    return use_jit_compilation_;
+  }
+
+  bool SaveProfilingInfo() const {
+    return save_profiling_info_;
+  }
+
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
+
+  // Profiling methods.
+  void MethodEntered(Thread* thread, ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void NotifyInterpreterToCompiledCodeTransition(Thread* self, ArtMethod* caller)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    AddSamples(self, caller, invoke_transition_weight_, false);
+  }
+
+  void NotifyCompiledCodeToInterpreterTransition(Thread* self, ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    AddSamples(self, callee, invoke_transition_weight_, false);
   }
 
   // Starts the profile saver if the config options allow profile recording.
@@ -84,9 +136,7 @@
                          const std::string& app_dir);
   void StopProfileSaver();
 
-  void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_) {
-    DumpInfo(os);
-  }
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_);
 
   static void NewTypeLoadedIfUsingJit(mirror::Class* type)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -101,6 +151,9 @@
   // Return whether we can invoke JIT code for `method`.
   bool CanInvokeCompiledCode(ArtMethod* method);
 
+  // Return whether the runtime should use a priority thread weight when sampling.
+  static bool ShouldUsePriorityThreadWeight();
+
   // If an OSR compiled version is available for `method`,
   // and `dex_pc + dex_pc_offset` is an entry point of that compiled
   // version, this method will jump to the compiled code, let it run,
@@ -112,17 +165,20 @@
                                         JValue* result)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static bool LoadCompilerLibrary(std::string* error_msg);
+
  private:
   Jit();
-  bool LoadCompiler(std::string* error_msg);
+
+  static bool LoadCompiler(std::string* error_msg);
 
   // JIT compiler
-  void* jit_library_handle_;
-  void* jit_compiler_handle_;
-  void* (*jit_load_)(bool*);
-  void (*jit_unload_)(void*);
-  bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
-  void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
+  static void* jit_library_handle_;
+  static void* jit_compiler_handle_;
+  static void* (*jit_load_)(bool*);
+  static void (*jit_unload_)(void*);
+  static bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
+  static void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
 
   // Performance monitoring.
   bool dump_info_on_shutdown_;
@@ -130,11 +186,17 @@
   Histogram<uint64_t> memory_use_ GUARDED_BY(lock_);
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
-  std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
 
+  bool use_jit_compilation_;
   bool save_profiling_info_;
-  bool generate_debug_info_;
+  static bool generate_debug_info_;
+  uint16_t hot_method_threshold_;
+  uint16_t warm_method_threshold_;
+  uint16_t osr_method_threshold_;
+  uint16_t priority_thread_weight_;
+  uint16_t invoke_transition_weight_;
+  std::unique_ptr<ThreadPool> thread_pool_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
@@ -151,6 +213,12 @@
   size_t GetOsrThreshold() const {
     return osr_threshold_;
   }
+  uint16_t GetPriorityThreadWeight() const {
+    return priority_thread_weight_;
+  }
+  size_t GetInvokeTransitionWeight() const {
+    return invoke_transition_weight_;
+  }
   size_t GetCodeCacheInitialCapacity() const {
     return code_cache_initial_capacity_;
   }
@@ -163,32 +231,34 @@
   bool GetSaveProfilingInfo() const {
     return save_profiling_info_;
   }
-  bool UseJIT() const {
-    return use_jit_;
+  bool UseJitCompilation() const {
+    return use_jit_compilation_;
   }
-  void SetUseJIT(bool b) {
-    use_jit_ = b;
+  void SetUseJitCompilation(bool b) {
+    use_jit_compilation_ = b;
   }
   void SetSaveProfilingInfo(bool b) {
     save_profiling_info_ = b;
   }
   void SetJitAtFirstUse() {
-    use_jit_ = true;
+    use_jit_compilation_ = true;
     compile_threshold_ = 0;
   }
 
  private:
-  bool use_jit_;
+  bool use_jit_compilation_;
   size_t code_cache_initial_capacity_;
   size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
   size_t osr_threshold_;
+  uint16_t priority_thread_weight_;
+  size_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
   bool save_profiling_info_;
 
   JitOptions()
-      : use_jit_(false),
+      : use_jit_compilation_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 53d645c..6b6f5a5 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -195,9 +195,7 @@
 
 uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   ArtMethod* method,
-                                  const uint8_t* mapping_table,
                                   const uint8_t* vmap_table,
-                                  const uint8_t* gc_map,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
@@ -206,9 +204,7 @@
                                   bool osr) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
-                                       mapping_table,
                                        vmap_table,
-                                       gc_map,
                                        frame_size_in_bytes,
                                        core_spill_mask,
                                        fp_spill_mask,
@@ -220,9 +216,7 @@
     GarbageCollectCache(self);
     result = CommitCodeInternal(self,
                                 method,
-                                mapping_table,
                                 vmap_table,
-                                gc_map,
                                 frame_size_in_bytes,
                                 core_spill_mask,
                                 fp_spill_mask,
@@ -254,8 +248,6 @@
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
 
-  FreeData(const_cast<uint8_t*>(method_header->GetNativeGcMap()));
-  FreeData(const_cast<uint8_t*>(method_header->GetMappingTable()));
   // Use the offset directly to prevent sanity check that the method is
   // compiled with optimizing.
   // TODO(ngeoffray): Clean up.
@@ -314,9 +306,7 @@
 
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
-                                          const uint8_t* mapping_table,
                                           const uint8_t* vmap_table,
-                                          const uint8_t* gc_map,
                                           size_t frame_size_in_bytes,
                                           size_t core_spill_mask,
                                           size_t fp_spill_mask,
@@ -346,9 +336,7 @@
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
-          (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
           (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
-          (gc_map == nullptr) ? 0 : code_ptr - gc_map,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
@@ -366,8 +354,7 @@
     if (osr) {
       number_of_osr_compilations_++;
       osr_code_map_.Put(method, code_ptr);
-    } else if (!Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()) {
-      // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
+    } else {
       Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
           method, method_header->GetEntryPoint());
     }
@@ -378,7 +365,7 @@
     }
     last_update_time_ns_.StoreRelease(NanoTime());
     VLOG(jit)
-        << "JIT added (osr = " << std::boolalpha << osr << std::noboolalpha << ") "
+        << "JIT added (osr=" << std::boolalpha << osr << std::noboolalpha << ") "
         << PrettyMethod(method) << "@" << method
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
@@ -646,10 +633,7 @@
       bool next_collection_will_be_full = ShouldDoFullCollection();
 
       // Start polling the liveness of compiled code to prepare for the next full collection.
-      // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
-      // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
-      if (!Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled() &&
-          next_collection_will_be_full) {
+      if (next_collection_will_be_full) {
         // Save the entry point of methods we have compiled, and update the entry
         // point of those methods to the interpreter. If the method is invoked, the
         // interpreter will update its entry point to the compiled code and call it.
@@ -657,7 +641,8 @@
           const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
           if (ContainsPc(entry_point)) {
             info->SetSavedEntryPoint(entry_point);
-            info->GetMethod()->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+            Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+                info->GetMethod(), GetQuickToInterpreterBridge());
           }
         }
 
@@ -902,13 +887,15 @@
   }
 }
 
-void JitCodeCache::GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
-                                         std::vector<ArtMethod*>& methods) {
+void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                                      std::vector<MethodReference>& methods) {
   ScopedTrace trace(__FUNCTION__);
   MutexLock mu(Thread::Current(), lock_);
-  for (auto it : method_code_map_) {
-    if (ContainsElement(dex_base_locations, it.second->GetDexFile()->GetBaseLocation())) {
-      methods.push_back(it.second);
+  for (const ProfilingInfo* info : profiling_infos_) {
+    ArtMethod* method = info->GetMethod();
+    const DexFile* dex_file = method->GetDexFile();
+    if (ContainsElement(dex_base_locations, dex_file->GetBaseLocation())) {
+      methods.emplace_back(dex_file,  method->GetDexMethodIndex());
     }
   }
 }
@@ -917,15 +904,18 @@
   return last_update_time_ns_.LoadAcquire();
 }
 
+bool JitCodeCache::IsOsrCompiled(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  return osr_code_map_.find(method) != osr_code_map_.end();
+}
+
 bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
   if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << PrettyMethod(method) << " is already compiled";
     return false;
   }
 
   MutexLock mu(self, lock_);
   if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
-    VLOG(jit) << PrettyMethod(method) << " is already osr compiled";
     return false;
   }
 
@@ -939,12 +929,11 @@
     return false;
   }
 
-  if (info->IsMethodBeingCompiled()) {
-    VLOG(jit) << PrettyMethod(method) << " is already being compiled";
+  if (info->IsMethodBeingCompiled(osr)) {
     return false;
   }
 
-  info->SetIsMethodBeingCompiled(true);
+  info->SetIsMethodBeingCompiled(true, osr);
   return true;
 }
 
@@ -964,10 +953,10 @@
   info->DecrementInlineUse();
 }
 
-void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) {
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
-  DCHECK(info->IsMethodBeingCompiled());
-  info->SetIsMethodBeingCompiled(false);
+  DCHECK(info->IsMethodBeingCompiled(osr));
+  info->SetIsMethodBeingCompiled(false, osr);
 }
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index a54f04f..4df6762 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -26,6 +26,7 @@
 #include "gc/accounting/bitmap.h"
 #include "gc_root.h"
 #include "jni.h"
+#include "method_reference.h"
 #include "oat_file.h"
 #include "object_callbacks.h"
 #include "safe_map.h"
@@ -80,7 +81,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
-  void DoneCompiling(ArtMethod* method, Thread* self)
+  void DoneCompiling(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -91,9 +92,7 @@
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
-                      const uint8_t* mapping_table,
                       const uint8_t* vmap_table,
-                      const uint8_t* gc_map,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
@@ -167,9 +166,9 @@
 
   void* MoreCore(const void* mspace, intptr_t increment);
 
-  // Adds to `methods` all the compiled ArtMethods which are part of any of the given dex locations.
-  void GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
-                             std::vector<ArtMethod*>& methods)
+  // Adds to `methods` all profiled methods which are part of any of the given dex locations.
+  void GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                          std::vector<MethodReference>& methods)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -188,6 +187,8 @@
 
   void Dump(std::ostream& os) REQUIRES(!lock_);
 
+  bool IsOsrCompiled(ArtMethod* method) REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -201,9 +202,7 @@
   // allocation fails. Return null if the allocation fails.
   uint8_t* CommitCodeInternal(Thread* self,
                               ArtMethod* method,
-                              const uint8_t* mapping_table,
                               const uint8_t* vmap_table,
-                              const uint8_t* gc_map,
                               size_t frame_size_in_bytes,
                               size_t core_spill_mask,
                               size_t fp_spill_mask,
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
deleted file mode 100644
index d751e5a..0000000
--- a/runtime/jit/jit_instrumentation.cc
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "jit_instrumentation.h"
-
-#include "art_method-inl.h"
-#include "jit.h"
-#include "jit_code_cache.h"
-#include "scoped_thread_state_change.h"
-#include "thread_list.h"
-
-namespace art {
-namespace jit {
-
-// At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
-static constexpr int kJitPoolThreadPthreadPriority = 9;
-
-class JitCompileTask FINAL : public Task {
- public:
-  enum TaskKind {
-    kAllocateProfile,
-    kCompile,
-    kCompileOsr
-  };
-
-  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
-    ScopedObjectAccess soa(Thread::Current());
-    // Add a global ref to the class to prevent class unloading until compilation is done.
-    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
-    CHECK(klass_ != nullptr);
-  }
-
-  ~JitCompileTask() {
-    ScopedObjectAccess soa(Thread::Current());
-    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
-  }
-
-  void Run(Thread* self) OVERRIDE {
-    ScopedObjectAccess soa(self);
-    if (kind_ == kCompile) {
-      VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
-        VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
-      }
-    } else if (kind_ == kCompileOsr) {
-      VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
-        VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
-      }
-    } else {
-      DCHECK(kind_ == kAllocateProfile);
-      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
-      }
-    }
-  }
-
-  void Finalize() OVERRIDE {
-    delete this;
-  }
-
- private:
-  ArtMethod* const method_;
-  const TaskKind kind_;
-  jobject klass_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
-};
-
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold,
-                                                 size_t osr_method_threshold)
-    : hot_method_threshold_(hot_method_threshold),
-      warm_method_threshold_(warm_method_threshold),
-      osr_method_threshold_(osr_method_threshold),
-      listener_(this) {
-}
-
-void JitInstrumentationCache::CreateThreadPool() {
-  // Create the thread pool before setting the instrumentation, so that
-  // when the threads stopped being suspended, they can use it directly.
-  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
-  // is not null when we instrument.
-  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
-  thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
-  thread_pool_->StartWorkers(Thread::Current());
-  {
-    // Add Jit interpreter instrumentation, tells the interpreter when
-    // to notify the jit to compile something.
-    ScopedSuspendAll ssa(__FUNCTION__);
-    Runtime::Current()->GetInstrumentation()->AddListener(
-        &listener_, JitInstrumentationListener::kJitEvents);
-  }
-}
-
-void JitInstrumentationCache::DeleteThreadPool(Thread* self) {
-  DCHECK(Runtime::Current()->IsShuttingDown(self));
-  if (thread_pool_ != nullptr) {
-    // First remove the listener, to avoid having mutators enter
-    // 'AddSamples'.
-    ThreadPool* cache = nullptr;
-    {
-      ScopedSuspendAll ssa(__FUNCTION__);
-      Runtime::Current()->GetInstrumentation()->RemoveListener(
-          &listener_, JitInstrumentationListener::kJitEvents);
-      // Clear thread_pool_ field while the threads are suspended.
-      // A mutator in the 'AddSamples' method will check against it.
-      cache = thread_pool_.release();
-    }
-    cache->StopWorkers(self);
-    cache->RemoveAllTasks(self);
-    // We could just suspend all threads, but we know those threads
-    // will finish in a short period, so it's not worth adding a suspend logic
-    // here. Besides, this is only done for shutdown.
-    cache->Wait(self, false, false);
-    delete cache;
-  }
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
-  // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() || method->IsNative()) {
-    return;
-  }
-  DCHECK(thread_pool_ != nullptr);
-
-  uint16_t sample_count = method->IncrementCounter();
-  if (sample_count == warm_method_threshold_) {
-    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
-    if (success) {
-      VLOG(jit) << "Start profiling " << PrettyMethod(method);
-    }
-
-    if (thread_pool_ == nullptr) {
-      // Calling ProfilingInfo::Create might put us in a suspended state, which could
-      // lead to the thread pool being deleted when we are shutting down.
-      DCHECK(Runtime::Current()->IsShuttingDown(self));
-      return;
-    }
-
-    if (!success) {
-      // We failed allocating. Instead of doing the collection on the Java thread, we push
-      // an allocation to a compiler thread, that will do the collection.
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
-    }
-  }
-
-  if (sample_count == hot_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-  }
-
-  if (sample_count == osr_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
-  }
-}
-
-JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
-    : instrumentation_cache_(cache) {
-  CHECK(instrumentation_cache_ != nullptr);
-}
-
-void JitInstrumentationListener::MethodEntered(Thread* thread,
-                                               mirror::Object* /*this_object*/,
-                                               ArtMethod* method,
-                                               uint32_t /*dex_pc*/) {
-  if (UNLIKELY(Runtime::Current()->GetJit()->JitAtFirstUse())) {
-    // The compiler requires a ProfilingInfo object.
-    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
-    JitCompileTask compile_task(method, JitCompileTask::kCompile);
-    compile_task.Run(thread);
-    return;
-  }
-
-  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
-  // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
-  // instead of interpreting the method.
-  // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
-  // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
-  if ((profiling_info != nullptr) &&
-      (profiling_info->GetSavedEntryPoint() != nullptr) &&
-      !Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()) {
-    method->SetEntryPointFromQuickCompiledCode(profiling_info->GetSavedEntryPoint());
-  } else {
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-}
-
-void JitInstrumentationListener::Branch(Thread* thread,
-                                        ArtMethod* method,
-                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
-                                        int32_t dex_pc_offset) {
-  if (dex_pc_offset < 0) {
-    // Increment method hotness if it is a backward branch.
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-}
-
-void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
-                                                          mirror::Object* this_object,
-                                                          ArtMethod* caller,
-                                                          uint32_t dex_pc,
-                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
-  // We make sure we cannot be suspended, as the profiling info can be concurrently deleted.
-  instrumentation_cache_->AddSamples(thread, caller, 1);
-  DCHECK(this_object != nullptr);
-  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
-  if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
-    info->AddInvokeInfo(dex_pc, this_object->GetClass());
-  }
-}
-
-void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
-  if (thread_pool_ != nullptr) {
-    thread_pool_->Wait(self, false, false);
-  }
-}
-
-}  // namespace jit
-}  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
deleted file mode 100644
index d1c5c44..0000000
--- a/runtime/jit/jit_instrumentation.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-#define ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-
-#include <unordered_map>
-
-#include "instrumentation.h"
-
-#include "atomic.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "gc_root.h"
-#include "jni.h"
-#include "object_callbacks.h"
-#include "thread_pool.h"
-
-namespace art {
-namespace mirror {
-  class Object;
-  class Throwable;
-}  // namespace mirror
-class ArtField;
-class ArtMethod;
-union JValue;
-class Thread;
-
-namespace jit {
-
-class JitInstrumentationCache;
-
-class JitInstrumentationListener : public instrumentation::InstrumentationListener {
- public:
-  explicit JitInstrumentationListener(JitInstrumentationCache* cache);
-
-  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                     ArtMethod* method, uint32_t /*dex_pc*/)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    const JValue& /*return_value*/)
-      OVERRIDE { }
-  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                 ArtField* /*field*/) OVERRIDE { }
-  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    ArtField* /*field*/, const JValue& /*field_value*/)
-      OVERRIDE { }
-  void ExceptionCaught(Thread* /*thread*/,
-                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
-
-  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
-
-  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void InvokeVirtualOrInterface(Thread* thread,
-                                mirror::Object* this_object,
-                                ArtMethod* caller,
-                                uint32_t dex_pc,
-                                ArtMethod* callee)
-      OVERRIDE
-      REQUIRES(Roles::uninterruptible_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  static constexpr uint32_t kJitEvents =
-      instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBranch |
-      instrumentation::Instrumentation::kInvokeVirtualOrInterface;
-
- private:
-  JitInstrumentationCache* const instrumentation_cache_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
-};
-
-// Keeps track of which methods are hot.
-class JitInstrumentationCache {
- public:
-  JitInstrumentationCache(size_t hot_method_threshold,
-                          size_t warm_method_threshold,
-                          size_t osr_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateThreadPool();
-  void DeleteThreadPool(Thread* self);
-
-  size_t HotMethodThreshold() const {
-    return hot_method_threshold_;
-  }
-
-  // Wait until there is no more pending compilation tasks.
-  void WaitForCompilationToFinish(Thread* self);
-
- private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  size_t osr_method_threshold_;
-  JitInstrumentationListener listener_;
-  std::unique_ptr<ThreadPool> thread_pool_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
-};
-
-}  // namespace jit
-}  // namespace art
-
-#endif  // ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index f181ca3..c99d363 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -16,7 +16,8 @@
 
 #include "offline_profiling_info.h"
 
-#include <fstream>
+#include "errno.h"
+#include <limits.h>
 #include <vector>
 #include <sys/file.h>
 #include <sys/stat.h>
@@ -34,6 +35,11 @@
 
 namespace art {
 
+const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '1', '\0' };
+
+static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
+
 // Transform the actual dex location into relative paths.
 // Note: this is OK because we don't store profiles of different apps into the same file.
 // Apps with split apks don't cause trouble because each split has a different name and will not
@@ -49,15 +55,27 @@
   }
 }
 
-bool ProfileCompilationInfo::SaveProfilingInfo(
-    const std::string& filename,
-    const std::vector<ArtMethod*>& methods,
+bool ProfileCompilationInfo::AddMethodsAndClasses(
+    const std::vector<MethodReference>& methods,
     const std::set<DexCacheResolvedClasses>& resolved_classes) {
-  if (methods.empty() && resolved_classes.empty()) {
-    VLOG(profiler) << "No info to save to " << filename;
-    return true;
+  for (const MethodReference& method : methods) {
+    if (!AddMethodIndex(GetProfileDexFileKey(method.dex_file->GetLocation()),
+                        method.dex_file->GetLocationChecksum(),
+                        method.dex_method_index)) {
+      return false;
+    }
   }
+  for (const DexCacheResolvedClasses& dex_cache : resolved_classes) {
+    if (!AddResolvedClasses(dex_cache)) {
+      return false;
+    }
+  }
+  return true;
+}
 
+bool ProfileCompilationInfo::MergeAndSave(const std::string& filename,
+                                          uint64_t* bytes_written,
+                                          bool force) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   ScopedFlock flock;
   std::string error;
@@ -68,26 +86,37 @@
 
   int fd = flock.GetFile()->Fd();
 
-  ProfileCompilationInfo info;
-  if (!info.Load(fd)) {
-    LOG(WARNING) << "Could not load previous profile data from file " << filename;
-    return false;
-  }
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    for (ArtMethod* method : methods) {
-      const DexFile* dex_file = method->GetDexFile();
-      if (!info.AddMethodIndex(GetProfileDexFileKey(dex_file->GetLocation()),
-                               dex_file->GetLocationChecksum(),
-                               method->GetDexMethodIndex())) {
+  // Load the file but keep a copy around to be able to infer if the content has changed.
+  ProfileCompilationInfo fileInfo;
+  ProfileLoadSatus status = fileInfo.LoadInternal(fd, &error);
+  if (status == kProfileLoadSuccess) {
+    // Merge the content of file into the current object.
+    if (MergeWith(fileInfo)) {
+      // If after the merge we have the same data as what is the file there's no point
+      // in actually doing the write. The file will be exactly the same as before.
+      if (Equals(fileInfo)) {
+        if (bytes_written != nullptr) {
+          *bytes_written = 0;
+        }
+        return true;
+      }
+    } else {
+      LOG(WARNING) << "Could not merge previous profile data from file " << filename;
+      if (!force) {
         return false;
       }
     }
-    for (const DexCacheResolvedClasses& dex_cache : resolved_classes) {
-      info.AddResolvedClasses(dex_cache);
-    }
+  } else if (force &&
+        ((status == kProfileLoadVersionMismatch) || (status == kProfileLoadBadData))) {
+      // Log a warning but don't return false. We will clear the profile anyway.
+      LOG(WARNING) << "Clearing bad or obsolete profile data from file "
+          << filename << ": " << error;
+  } else {
+    LOG(WARNING) << "Could not load profile data from file " << filename << ": " << error;
+    return false;
   }
 
+  // We need to clear the data because we don't support appending to the profiles yet.
   if (!flock.GetFile()->ClearContent()) {
     PLOG(WARNING) << "Could not clear profile file: " << filename;
     return false;
@@ -95,95 +124,118 @@
 
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(fd);
+  bool result = Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
+    if (bytes_written != nullptr) {
+      *bytes_written = GetFileSizeBytes(filename);
+    }
   } else {
     VLOG(profiler) << "Failed to save profile info to " << filename;
   }
   return result;
 }
 
-static bool WriteToFile(int fd, const std::ostringstream& os) {
-  std::string data(os.str());
-  const char *p = data.c_str();
-  size_t length = data.length();
-  do {
-    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
-    if (n < 0) {
-      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+// Returns true if all the bytes were successfully written to the file descriptor.
+static bool WriteBuffer(int fd, const uint8_t* buffer, size_t byte_count) {
+  while (byte_count > 0) {
+    int bytes_written = TEMP_FAILURE_RETRY(write(fd, buffer, byte_count));
+    if (bytes_written == -1) {
       return false;
     }
-    p += n;
-    length -= n;
-  } while (length > 0);
+    byte_count -= bytes_written;  // Reduce the number of remaining bytes.
+    buffer += bytes_written;  // Move the buffer forward.
+  }
   return true;
 }
 
-static constexpr const char kFieldSeparator = ',';
-static constexpr const char kLineSeparator = '\n';
-static constexpr const char* kClassesMarker = "classes";
+// Add the string bytes to the buffer.
+static void AddStringToBuffer(std::vector<uint8_t>* buffer, const std::string& value) {
+  buffer->insert(buffer->end(), value.begin(), value.end());
+}
+
+// Insert each byte, from low to high into the buffer.
+template <typename T>
+static void AddUintToBuffer(std::vector<uint8_t>* buffer, T value) {
+  for (size_t i = 0; i < sizeof(T); i++) {
+    buffer->push_back((value >> (i * kBitsPerByte)) & 0xff);
+  }
+}
+
+static constexpr size_t kLineHeaderSize =
+    3 * sizeof(uint16_t) +  // method_set.size + class_set.size + dex_location.size
+    sizeof(uint32_t);       // checksum
 
 /**
  * Serialization format:
- *    dex_location1,dex_location_checksum1,method_id11,method_id12...,classes,class_id1,class_id2...
- *    dex_location2,dex_location_checksum2,method_id21,method_id22...,classes,class_id1,class_id2...
- * e.g.
- *    app.apk,131232145,11,23,454,54,classes,1,2,4,1234
- *    app.apk:classes5.dex,218490184,39,13,49,1
+ *    magic,version,number_of_lines
+ *    dex_location1,number_of_methods1,number_of_classes1,dex_location_checksum1, \
+ *        method_id11,method_id12...,class_id1,class_id2...
+ *    dex_location2,number_of_methods2,number_of_classes2,dex_location_checksum2, \
+ *        method_id21,method_id22...,,class_id1,class_id2...
+ *    .....
  **/
 bool ProfileCompilationInfo::Save(int fd) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
-  // TODO(calin): Profile this and see how much memory it takes. If too much,
-  // write to file directly.
-  std::ostringstream os;
+
+  // Cache at most 5KB before writing.
+  static constexpr size_t kMaxSizeToKeepBeforeWriting = 5 * KB;
+  // Use a vector wrapper to avoid keeping track of offsets when we add elements.
+  std::vector<uint8_t> buffer;
+  WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic));
+  WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion));
+  AddUintToBuffer(&buffer, static_cast<uint16_t>(info_.size()));
+
   for (const auto& it : info_) {
+    if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
+      if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
+        return false;
+      }
+      buffer.clear();
+    }
     const std::string& dex_location = it.first;
     const DexFileData& dex_data = it.second;
     if (dex_data.method_set.empty() && dex_data.class_set.empty()) {
       continue;
     }
 
-    os << dex_location << kFieldSeparator << dex_data.checksum;
+    if (dex_location.size() >= kMaxDexFileKeyLength) {
+      LOG(WARNING) << "DexFileKey exceeds allocated limit";
+      return false;
+    }
+
+    // Make sure that the buffer has enough capacity to avoid repeated resizings
+    // while we add data.
+    size_t required_capacity = buffer.size() +
+        kLineHeaderSize +
+        dex_location.size() +
+        sizeof(uint16_t) * (dex_data.class_set.size() + dex_data.method_set.size());
+
+    buffer.reserve(required_capacity);
+
+    DCHECK_LE(dex_location.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.method_set.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_location.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.method_set.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+    AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
+
+    AddStringToBuffer(&buffer, dex_location);
+
     for (auto method_it : dex_data.method_set) {
-      os << kFieldSeparator << method_it;
+      AddUintToBuffer(&buffer, method_it);
     }
-    if (!dex_data.class_set.empty()) {
-      os << kFieldSeparator << kClassesMarker;
-      for (auto class_id : dex_data.class_set) {
-        os << kFieldSeparator << class_id;
-      }
+    for (auto class_id : dex_data.class_set) {
+      AddUintToBuffer(&buffer, class_id);
     }
-    os << kLineSeparator;
+    DCHECK_EQ(required_capacity, buffer.size())
+        << "Failed to add the expected number of bytes in the buffer";
   }
 
-  return WriteToFile(fd, os);
-}
-
-// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
-// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
-static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
-  const char* p = s.data();
-  const char* end = p + s.size();
-  // Check if the first character is the separator.
-  if (p != end && *p ==separator) {
-    result->push_back("");
-    ++p;
-  }
-  // Process the rest of the characters.
-  while (p != end) {
-    if (*p == separator) {
-      ++p;
-    } else {
-      const char* start = p;
-      while (++p != end && *p != separator) {
-        // Skip to the next occurrence of the separator.
-      }
-      result->push_back(std::string(start, p - start));
-    }
-  }
+  return WriteBuffer(fd, buffer.data(), buffer.size());
 }
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
@@ -233,120 +285,260 @@
   return true;
 }
 
-bool ProfileCompilationInfo::ProcessLine(const std::string& line) {
-  std::vector<std::string> parts;
-  SplitString(line, kFieldSeparator, &parts);
-  if (parts.size() < 3) {
-    LOG(WARNING) << "Invalid line: " << line;
-    return false;
-  }
-
-  const std::string& dex_location = parts[0];
-  uint32_t checksum;
-  if (!ParseInt(parts[1].c_str(), &checksum)) {
-    return false;
-  }
-
-  for (size_t i = 2; i < parts.size(); i++) {
-    if (parts[i] == kClassesMarker) {
-      ++i;
-      // All of the remaining idx are class def indexes.
-      for (++i; i < parts.size(); ++i) {
-        uint32_t class_def_idx;
-        if (!ParseInt(parts[i].c_str(), &class_def_idx)) {
-          LOG(WARNING) << "Cannot parse class_def_idx " << parts[i];
-          return false;
-        } else if (class_def_idx >= std::numeric_limits<uint16_t>::max()) {
-          LOG(WARNING) << "Class def idx " << class_def_idx << " is larger than uint16_t max";
-          return false;
-        }
-        if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
-          return false;
-        }
-      }
-      break;
-    }
-    uint32_t method_idx;
-    if (!ParseInt(parts[i].c_str(), &method_idx)) {
-      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
-      return false;
-    }
+bool ProfileCompilationInfo::ProcessLine(SafeBuffer& line_buffer,
+                                         uint16_t method_set_size,
+                                         uint16_t class_set_size,
+                                         uint32_t checksum,
+                                         const std::string& dex_location) {
+  for (uint16_t i = 0; i < method_set_size; i++) {
+    uint16_t method_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
     if (!AddMethodIndex(dex_location, checksum, method_idx)) {
       return false;
     }
   }
+
+  for (uint16_t i = 0; i < class_set_size; i++) {
+    uint16_t class_def_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
+      return false;
+    }
+  }
   return true;
 }
 
-// Parses the buffer (of length n) starting from start_from and identify new lines
-// based on kLineSeparator marker.
-// Returns the first position after kLineSeparator in the buffer (starting from start_from),
-// or -1 if the marker doesn't appear.
-// The processed characters are appended to the given line.
-static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
-  if (start_from >= n) {
-    return -1;
+// Tests for EOF by trying to read 1 byte from the descriptor.
+// Returns:
+//   0 if the descriptor is at the EOF,
+//  -1 if there was an IO error
+//   1 if the descriptor has more content to read
+static int testEOF(int fd) {
+  uint8_t buffer[1];
+  return TEMP_FAILURE_RETRY(read(fd, buffer, 1));
+}
+
+// Reads an uint value previously written with AddUintToBuffer.
+template <typename T>
+T ProfileCompilationInfo::SafeBuffer::ReadUintAndAdvance() {
+  static_assert(std::is_unsigned<T>::value, "Type is not unsigned");
+  CHECK_LE(ptr_current_ + sizeof(T), ptr_end_);
+  T value = 0;
+  for (size_t i = 0; i < sizeof(T); i++) {
+    value += ptr_current_[i] << (i * kBitsPerByte);
   }
-  int new_line_pos = -1;
-  for (int i = start_from; i < n; i++) {
-    if (buffer[i] == kLineSeparator) {
-      new_line_pos = i;
-      break;
+  ptr_current_ += sizeof(T);
+  return value;
+}
+
+bool ProfileCompilationInfo::SafeBuffer::CompareAndAdvance(const uint8_t* data, size_t data_size) {
+  if (ptr_current_ + data_size > ptr_end_) {
+    return false;
+  }
+  if (memcmp(ptr_current_, data, data_size) == 0) {
+    ptr_current_ += data_size;
+    return true;
+  }
+  return false;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
+      int fd,
+      const std::string& source,
+      /*out*/std::string* error) {
+  size_t byte_count = ptr_end_ - ptr_current_;
+  uint8_t* buffer = ptr_current_;
+  while (byte_count > 0) {
+    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
+    if (bytes_read == 0) {
+      *error += "Profile EOF reached prematurely for " + source;
+      return kProfileLoadBadData;
+    } else if (bytes_read < 0) {
+      *error += "Profile IO error for " + source + strerror(errno);
+      return kProfileLoadIOError;
     }
+    byte_count -= bytes_read;
+    buffer += bytes_read;
   }
-  int append_limit = new_line_pos == -1 ? n : new_line_pos;
-  line.append(buffer + start_from, append_limit - start_from);
-  // Jump over kLineSeparator and return the position of the next character.
-  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
+      int fd,
+      /*out*/uint16_t* number_of_lines,
+      /*out*/std::string* error) {
+  // Read magic and version
+  const size_t kMagicVersionSize =
+    sizeof(kProfileMagic) +
+    sizeof(kProfileVersion) +
+    sizeof(uint16_t);  // number of lines
+
+  SafeBuffer safe_buffer(kMagicVersionSize);
+
+  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  if (!safe_buffer.CompareAndAdvance(kProfileMagic, sizeof(kProfileMagic))) {
+    *error = "Profile missing magic";
+    return kProfileLoadVersionMismatch;
+  }
+  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+    *error = "Profile version mismatch";
+    return kProfileLoadVersionMismatch;
+  }
+  *number_of_lines = safe_buffer.ReadUintAndAdvance<uint16_t>();
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
+      int fd,
+      /*out*/ProfileLineHeader* line_header,
+      /*out*/std::string* error) {
+  SafeBuffer header_buffer(kLineHeaderSize);
+  ProfileLoadSatus status = header_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  uint16_t dex_location_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->method_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->class_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->checksum = header_buffer.ReadUintAndAdvance<uint32_t>();
+
+  if (dex_location_size == 0 || dex_location_size > kMaxDexFileKeyLength) {
+    *error = "DexFileKey has an invalid size: " +
+        std::to_string(static_cast<uint32_t>(dex_location_size));
+    return kProfileLoadBadData;
+  }
+
+  SafeBuffer location_buffer(dex_location_size);
+  status = location_buffer.FillFromFd(fd, "ReadProfileHeaderDexLocation", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+  line_header->dex_location.assign(
+      reinterpret_cast<char*>(location_buffer.Get()), dex_location_size);
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
+      int fd,
+      const ProfileLineHeader& line_header,
+      /*out*/std::string* error) {
+  // Make sure that we don't try to read everything in memory (in case the profile if full).
+  // Split readings in chunks of at most 10kb.
+  static constexpr uint16_t kMaxNumberOfEntriesToRead = 5120;
+  uint16_t methods_left_to_read = line_header.method_set_size;
+  uint16_t classes_left_to_read = line_header.class_set_size;
+
+  while ((methods_left_to_read > 0) || (classes_left_to_read > 0)) {
+    uint16_t methods_to_read = std::min(kMaxNumberOfEntriesToRead, methods_left_to_read);
+    uint16_t max_classes_to_read = kMaxNumberOfEntriesToRead - methods_to_read;
+    uint16_t classes_to_read = std::min(max_classes_to_read, classes_left_to_read);
+
+    size_t line_size = sizeof(uint16_t) * (methods_to_read + classes_to_read);
+    SafeBuffer line_buffer(line_size);
+
+    ProfileLoadSatus status = line_buffer.FillFromFd(fd, "ReadProfileLine", error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    if (!ProcessLine(line_buffer,
+                     methods_to_read,
+                     classes_to_read,
+                     line_header.checksum,
+                     line_header.dex_location)) {
+      *error = "Error when reading profile file line";
+      return kProfileLoadBadData;
+    }
+    methods_left_to_read -= methods_to_read;
+    classes_left_to_read -= classes_to_read;
+  }
+  return kProfileLoadSuccess;
 }
 
 bool ProfileCompilationInfo::Load(int fd) {
+  std::string error;
+  ProfileLoadSatus status = LoadInternal(fd, &error);
+
+  if (status == kProfileLoadSuccess) {
+    return true;
+  } else {
+    PLOG(WARNING) << "Error when reading profile " << error;
+    return false;
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
+      int fd, std::string* error) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  std::string current_line;
-  const int kBufferSize = 1024;
-  char buffer[kBufferSize];
-
-  while (true) {
-    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
-    if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file";
-      return false;
-    } else if (n == 0) {
-      break;
-    }
-    // Detect the new lines from the buffer. If we manage to complete a line,
-    // process it. Otherwise append to the current line.
-    int current_start_pos = 0;
-    while (current_start_pos < n) {
-      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
-      if (current_start_pos == -1) {
-        break;
-      }
-      if (!ProcessLine(current_line)) {
-        return false;
-      }
-      // Reset the current line (we just processed it).
-      current_line.clear();
-    }
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return kProfileLoadIOError;
   }
-  return true;
+  // We allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  if (stat_buffer.st_size == 0) {
+    return kProfileLoadSuccess;
+  }
+  // Read profile header: magic + version + number_of_lines.
+  uint16_t number_of_lines;
+  ProfileLoadSatus status = ReadProfileHeader(fd, &number_of_lines, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  while (number_of_lines > 0) {
+    ProfileLineHeader line_header;
+    // First, read the line header to get the amount of data we need to read.
+    status = ReadProfileLineHeader(fd, &line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+
+    // Now read the actual profile line.
+    status = ReadProfileLine(fd, line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    number_of_lines--;
+  }
+
+  // Check that we read everything and that profiles don't contain junk data.
+  int result = testEOF(fd);
+  if (result == 0) {
+    return kProfileLoadSuccess;
+  } else if (result < 0) {
+    return kProfileLoadIOError;
+  } else {
+    *error = "Unexpected content in the profile file";
+    return kProfileLoadBadData;
+  }
 }
 
-bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
+bool ProfileCompilationInfo::MergeWith(const ProfileCompilationInfo& other) {
+  // First verify that all checksums match. This will avoid adding garbage to
+  // the current profile info.
+  // Note that the number of elements should be very small, so this should not
+  // be a performance issue.
+  for (const auto& other_it : other.info_) {
+    auto info_it = info_.find(other_it.first);
+    if ((info_it != info_.end()) && (info_it->second.checksum != other_it.second.checksum)) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_it.first;
+      return false;
+    }
+  }
+  // All checksums match. Import the data.
   for (const auto& other_it : other.info_) {
     const std::string& other_dex_location = other_it.first;
     const DexFileData& other_dex_data = other_it.second;
-
     auto info_it = info_.find(other_dex_location);
     if (info_it == info_.end()) {
       info_it = info_.Put(other_dex_location, DexFileData(other_dex_data.checksum));
     }
-    if (info_it->second.checksum != other_dex_data.checksum) {
-      LOG(WARNING) << "Checksum mismatch for dex " << other_dex_location;
-      return false;
-    }
     info_it->second.method_set.insert(other_dex_data.method_set.begin(),
                                       other_dex_data.method_set.end());
     info_it->second.class_set.insert(other_dex_data.class_set.begin(),
@@ -387,6 +579,14 @@
   return total;
 }
 
+uint32_t ProfileCompilationInfo::GetNumberOfResolvedClasses() const {
+  uint32_t total = 0;
+  for (const auto& it : info_) {
+    total += it.second.class_set.size();
+  }
+  return total;
+}
+
 std::string ProfileCompilationInfo::DumpInfo(const std::vector<const DexFile*>* dex_files,
                                              bool print_full_dex_location) const {
   std::ostringstream os;
@@ -408,19 +608,29 @@
       std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
-    for (const auto method_it : dex_data.method_set) {
-      if (dex_files != nullptr) {
-        const DexFile* dex_file = nullptr;
-        for (size_t i = 0; i < dex_files->size(); i++) {
-          if (location == (*dex_files)[i]->GetLocation()) {
-            dex_file = (*dex_files)[i];
-          }
-        }
-        if (dex_file != nullptr) {
-          os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+    const DexFile* dex_file = nullptr;
+    if (dex_files != nullptr) {
+      for (size_t i = 0; i < dex_files->size(); i++) {
+        if (location == (*dex_files)[i]->GetLocation()) {
+          dex_file = (*dex_files)[i];
         }
       }
-      os << "\n  " << method_it;
+    }
+    os << "\n\tmethods: ";
+    for (const auto method_it : dex_data.method_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << PrettyMethod(method_it, *dex_file, true);
+      } else {
+        os << method_it << ",";
+      }
+    }
+    os << "\n\tclasses: ";
+    for (const auto class_it : dex_data.class_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << PrettyType(class_it, *dex_file);
+      } else {
+        os << class_it << ",";
+      }
     }
   }
   return os.str();
@@ -435,11 +645,18 @@
   for (auto&& pair : info_) {
     const std::string& profile_key = pair.first;
     const DexFileData& data = pair.second;
-    DexCacheResolvedClasses classes(profile_key, data.checksum);
+    // TODO: Is it OK to use the same location for both base and dex location here?
+    DexCacheResolvedClasses classes(profile_key, profile_key, data.checksum);
     classes.AddClasses(data.class_set.begin(), data.class_set.end());
     ret.insert(classes);
   }
   return ret;
 }
 
+void ProfileCompilationInfo::ClearResolvedClasses() {
+  for (auto& pair : info_) {
+    pair.second.class_set.clear();
+  }
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index df03244..5a07da7 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -28,9 +28,6 @@
 
 namespace art {
 
-class ArtMethod;
-class DexCacheProfileData;
-
 // TODO: rename file.
 /**
  * Profile information in a format suitable to be queried by the compiler and
@@ -41,21 +38,29 @@
  */
 class ProfileCompilationInfo {
  public:
-  // Saves profile information about the given methods in the given file.
-  // Note that the saving proceeds only if the file can be locked for exclusive access.
-  // If not (the locking is not blocking), the function does not save and returns false.
-  static bool SaveProfilingInfo(const std::string& filename,
-                                const std::vector<ArtMethod*>& methods,
-                                const std::set<DexCacheResolvedClasses>& resolved_classes);
+  static const uint8_t kProfileMagic[];
+  static const uint8_t kProfileVersion[];
 
+  // Add the given methods and classes to the current profile object.
+  bool AddMethodsAndClasses(const std::vector<MethodReference>& methods,
+                            const std::set<DexCacheResolvedClasses>& resolved_classes);
   // Loads profile information from the given file descriptor.
   bool Load(int fd);
-  // Loads the data from another ProfileCompilationInfo object.
-  bool Load(const ProfileCompilationInfo& info);
+  // Merge the data from another ProfileCompilationInfo into the current object.
+  bool MergeWith(const ProfileCompilationInfo& info);
   // Saves the profile data to the given file descriptor.
   bool Save(int fd);
+  // Loads and merges profile information from the given file into the current
+  // object and tries to save it back to disk.
+  // If `force` is true then the save will go through even if the given file
+  // has bad data or its version does not match. In this cases the profile content
+  // is ignored.
+  bool MergeAndSave(const std::string& filename, uint64_t* bytes_written, bool force);
+
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
+  // Returns the number of resolved classes that were profiled.
+  uint32_t GetNumberOfResolvedClasses() const;
 
   // Returns true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
@@ -70,8 +75,8 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
-  // For testing purposes.
   bool Equals(const ProfileCompilationInfo& other);
+
   static std::string GetProfileDexFileKey(const std::string& dex_location);
 
   // Returns the class descriptors for all of the classes in the profiles' class sets.
@@ -79,7 +84,17 @@
   // profile info stuff to generate a map back to the dex location.
   std::set<DexCacheResolvedClasses> GetResolvedClasses() const;
 
+  // Clears the resolved classes from the current object.
+  void ClearResolvedClasses();
+
  private:
+  enum ProfileLoadSatus {
+    kProfileLoadIOError,
+    kProfileLoadVersionMismatch,
+    kProfileLoadBadData,
+    kProfileLoadSuccess
+  };
+
   struct DexFileData {
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
@@ -96,9 +111,65 @@
   DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t class_idx);
-  bool AddResolvedClasses(const DexCacheResolvedClasses& classes)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool ProcessLine(const std::string& line);
+  bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
+
+  // Parsing functionality.
+
+  struct ProfileLineHeader {
+    std::string dex_location;
+    uint16_t method_set_size;
+    uint16_t class_set_size;
+    uint32_t checksum;
+  };
+
+  // A helper structure to make sure we don't read past our buffers in the loops.
+  struct SafeBuffer {
+   public:
+    explicit SafeBuffer(size_t size) : storage_(new uint8_t[size]) {
+      ptr_current_ = storage_.get();
+      ptr_end_ = ptr_current_ + size;
+    }
+
+    // Reads the content of the descriptor at the current position.
+    ProfileLoadSatus FillFromFd(int fd,
+                                const std::string& source,
+                                /*out*/std::string* error);
+
+    // Reads an uint value (high bits to low bits) and advances the current pointer
+    // with the number of bits read.
+    template <typename T> T ReadUintAndAdvance();
+
+    // Compares the given data with the content current pointer. If the contents are
+    // equal it advances the current pointer by data_size.
+    bool CompareAndAdvance(const uint8_t* data, size_t data_size);
+
+    // Get the underlying raw buffer.
+    uint8_t* Get() { return storage_.get(); }
+
+   private:
+    std::unique_ptr<uint8_t> storage_;
+    uint8_t* ptr_current_;
+    uint8_t* ptr_end_;
+  };
+
+  ProfileLoadSatus LoadInternal(int fd, std::string* error);
+
+  ProfileLoadSatus ReadProfileHeader(int fd,
+                                     /*out*/uint16_t* number_of_lines,
+                                     /*out*/std::string* error);
+
+  ProfileLoadSatus ReadProfileLineHeader(int fd,
+                                         /*out*/ProfileLineHeader* line_header,
+                                         /*out*/std::string* error);
+  ProfileLoadSatus ReadProfileLine(int fd,
+                                   const ProfileLineHeader& line_header,
+                                   /*out*/std::string* error);
+
+  bool ProcessLine(SafeBuffer& line_buffer,
+                   uint16_t method_set_size,
+                   uint16_t class_set_size,
+                   uint32_t checksum,
+                   const std::string& dex_location);
 
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index fdd8c6e..c8f4d94 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -21,6 +21,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "method_reference.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
@@ -49,16 +50,44 @@
     return methods;
   }
 
-  bool AddData(const std::string& dex_location,
-               uint32_t checksum,
-               uint16_t method_index,
-               ProfileCompilationInfo* info) {
+  bool AddMethod(const std::string& dex_location,
+                 uint32_t checksum,
+                 uint16_t method_index,
+                 ProfileCompilationInfo* info) {
     return info->AddMethodIndex(dex_location, checksum, method_index);
   }
 
+  bool AddClass(const std::string& dex_location,
+                uint32_t checksum,
+                uint16_t class_index,
+                ProfileCompilationInfo* info) {
+    return info->AddMethodIndex(dex_location, checksum, class_index);
+  }
+
   uint32_t GetFd(const ScratchFile& file) {
     return static_cast<uint32_t>(file.GetFd());
   }
+
+  bool SaveProfilingInfo(
+      const std::string& filename,
+      const std::vector<ArtMethod*>& methods,
+      const std::set<DexCacheResolvedClasses>& resolved_classes) {
+    ProfileCompilationInfo info;
+    std::vector<MethodReference> method_refs;
+    ScopedObjectAccess soa(Thread::Current());
+    for (ArtMethod* method : methods) {
+      method_refs.emplace_back(method->GetDexFile(), method->GetDexMethodIndex());
+    }
+    if (!info.AddMethodsAndClasses(method_refs, resolved_classes)) {
+      return false;
+    }
+    return info.MergeAndSave(filename, nullptr, false);
+  }
+
+  // Cannot sizeof the actual arrays so hardcode the values here.
+  // They should not change anyway.
+  static constexpr int kProfileMagicSize = 4;
+  static constexpr int kProfileVersionSize = 4;
 };
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
@@ -75,9 +104,7 @@
   // Save virtual methods from Main.
   std::set<DexCacheResolvedClasses> resolved_classes;
   std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
-  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(),
-                                                        main_methods,
-                                                        resolved_classes));
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), main_methods, resolved_classes));
 
   // Check that what we saved is in the profile.
   ProfileCompilationInfo info1;
@@ -92,9 +119,7 @@
 
   // Save virtual methods from Second.
   std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
-  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(),
-                                                        second_methods,
-                                                        resolved_classes));
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), second_methods, resolved_classes));
 
   // Check that what we saved is in the profile (methods form Main and Second).
   ProfileCompilationInfo info2;
@@ -118,8 +143,8 @@
   ProfileCompilationInfo saved_info;
   // Save a few methods.
   for (uint16_t i = 0; i < 10; i++) {
-    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
   }
   ASSERT_TRUE(saved_info.Save(GetFd(profile)));
   ASSERT_EQ(0, profile.GetFile()->Flush());
@@ -132,9 +157,9 @@
 
   // Save more methods.
   for (uint16_t i = 0; i < 100; i++) {
-    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
   }
   ASSERT_TRUE(profile.GetFile()->ResetOffset());
   ASSERT_TRUE(saved_info.Save(GetFd(profile)));
@@ -147,25 +172,156 @@
   ASSERT_TRUE(loaded_info2.Equals(saved_info));
 }
 
-TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+TEST_F(ProfileCompilationInfoTest, AddMethodsAndClassesFail) {
   ScratchFile profile;
 
   ProfileCompilationInfo info;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
   // Trying to add info for an existing file but with a different checksum.
-  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+  ASSERT_FALSE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
 }
 
-TEST_F(ProfileCompilationInfoTest, LoadFail) {
+TEST_F(ProfileCompilationInfoTest, MergeFail) {
   ScratchFile profile;
 
   ProfileCompilationInfo info1;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
   // Use the same file, change the checksum.
   ProfileCompilationInfo info2;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
 
-  ASSERT_FALSE(info1.Load(info2));
+  ASSERT_FALSE(info1.MergeWith(info2));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveMaxMethods) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  // Save the maximum number of classes
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddClass("dex_location1", /* checksum */ 1, /* class_idx */ i, &saved_info));
+    ASSERT_TRUE(AddClass("dex_location2", /* checksum */ 2, /* class_idx */ i, &saved_info));
+  }
+
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo empyt_info;
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(empyt_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadMagic) {
+  ScratchFile profile;
+  uint8_t buffer[] = { 1, 2, 3, 4 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(buffer, sizeof(buffer)));
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadVersion) {
+  ScratchFile profile;
+
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  uint8_t version[] = { 'v', 'e', 'r', 's', 'i', 'o', 'n' };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(version, sizeof(version)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, Incomplete) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, TooLongDexLocation) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+
+  // dex_location_size, methods_size, classes_size, checksum.
+  // Dex location size is too big and should be rejected.
+  uint8_t line[] = { 255, 255, 0, 1, 0, 1, 0, 0, 0, 0 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line, sizeof(line)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, UnexpectedContent) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+
+  uint8_t random_data[] = { 1, 2, 3};
+  ASSERT_TRUE(profile.GetFile()->WriteFully(random_data, sizeof(random_data)));
+
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we fail because of unexpected data at the end of the file.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
 }
 
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 6fe17db..cf46893 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,25 +22,25 @@
 
 #include "art_method-inl.h"
 #include "base/systrace.h"
-#include "scoped_thread_state_change.h"
+#include "base/time_utils.h"
+#include "compiler_filter.h"
 #include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 2s for now.
-static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
-
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
-static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
-static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kInitialDelayMs = 2 * 1000;  // 2 seconds
-static constexpr const double kBackoffCoef = 1.5;
+static constexpr const uint64_t kRandomDelayMaxMs = 30 * 1000;  // 30 seconds
+static constexpr const uint64_t kMaxBackoffMs = 10 * 60 * 1000;  // 10 minutes
+static constexpr const uint64_t kSavePeriodMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+static constexpr const double kBackoffCoef = 2.0;
 
-static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
+static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
+static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
 
 ProfileSaver* ProfileSaver::instance_ = nullptr;
 pthread_t ProfileSaver::profiler_pthread_ = 0U;
@@ -52,13 +52,21 @@
                            const std::string& app_data_dir)
     : jit_code_cache_(jit_code_cache),
       foreign_dex_profile_path_(foreign_dex_profile_path),
-      code_cache_last_update_time_ns_(0),
       shutting_down_(false),
-      first_profile_(true),
+      last_save_number_of_methods_(0),
+      last_save_number_of_classes_(0),
       wait_lock_("ProfileSaver wait lock"),
-      period_condition_("ProfileSaver period condition", wait_lock_) {
-  AddTrackedLocations(output_filename, code_paths);
-  app_data_dir_ = "";
+      period_condition_("ProfileSaver period condition", wait_lock_),
+      total_bytes_written_(0),
+      total_number_of_writes_(0),
+      total_number_of_code_cache_queries_(0),
+      total_number_of_skipped_writes_(0),
+      total_number_of_failed_writes_(0),
+      total_ms_of_sleep_(0),
+      total_ns_of_work_(0),
+      total_number_of_foreign_dex_marks_(0),
+      max_number_of_profile_entries_cached_(0) {
+  AddTrackedLocations(output_filename, app_data_dir, code_paths);
   if (!app_data_dir.empty()) {
     // The application directory is used to determine which dex files are owned by app.
     // Since it could be a symlink (e.g. /data/data instead of /data/user/0), and we
@@ -66,9 +74,9 @@
     // store it's canonical form to be sure we use the same base when comparing.
     UniqueCPtr<const char[]> app_data_dir_real_path(realpath(app_data_dir.c_str(), nullptr));
     if (app_data_dir_real_path != nullptr) {
-      app_data_dir_.assign(app_data_dir_real_path.get());
+      app_data_dirs_.emplace(app_data_dir_real_path.get());
     } else {
-      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir_
+      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir
           << ". The app dir will not be used to determine which dex files belong to the app";
     }
   }
@@ -80,14 +88,13 @@
 
   uint64_t save_period_ms = kSavePeriodMs;
   VLOG(profiler) << "Save profiling information every " << save_period_ms << " ms";
-
-  bool first_iteration = true;
+  bool cache_resolved_classes = true;
   while (!ShuttingDown(self)) {
     uint64_t sleep_time_ms;
-    if (first_iteration) {
+    if (cache_resolved_classes) {
       // Sleep less long for the first iteration since we want to record loaded classes shortly
       // after app launch.
-      sleep_time_ms = kInitialDelayMs;
+      sleep_time_ms = kSaveResolvedClassesDelayMs;
     } else {
       const uint64_t random_sleep_delay_ms = rand() % kRandomDelayMaxMs;
       sleep_time_ms = save_period_ms + random_sleep_delay_ms;
@@ -96,86 +103,166 @@
       MutexLock mu(self, wait_lock_);
       period_condition_.TimedWait(self, sleep_time_ms, 0);
     }
-
+    total_ms_of_sleep_ += sleep_time_ms;
     if (ShuttingDown(self)) {
       break;
     }
 
-    if (!ProcessProfilingInfo() && save_period_ms < kMaxBackoffMs) {
-      // If we don't need to save now it is less likely that we will need to do
-      // so in the future. Increase the time between saves according to the
-      // kBackoffCoef, but make it no larger than kMaxBackoffMs.
-      save_period_ms = static_cast<uint64_t>(kBackoffCoef * save_period_ms);
+    uint64_t start = NanoTime();
+    if (cache_resolved_classes) {
+      // TODO(calin) This only considers the case of the primary profile file.
+      // Anything that gets loaded in the same VM will not have their resolved
+      // classes save (unless they started before the initial saving was done).
+      FetchAndCacheResolvedClasses();
     } else {
-      // Reset the period to the initial value as it's highly likely to JIT again.
-      save_period_ms = kSavePeriodMs;
+      bool profile_saved_to_disk = ProcessProfilingInfo();
+      if (profile_saved_to_disk) {
+        // Reset the period to the initial value as it's highly likely to JIT again.
+        save_period_ms = kSavePeriodMs;
+        VLOG(profiler) << "Profile saver: saved something, period reset to: " << save_period_ms;
+      } else {
+        // If we don't need to save now it is less likely that we will need to do
+        // so in the future. Increase the time between saves according to the
+        // kBackoffCoef, but make it no larger than kMaxBackoffMs.
+        save_period_ms = std::min(kMaxBackoffMs,
+                                  static_cast<uint64_t>(kBackoffCoef * save_period_ms));
+        VLOG(profiler) << "Profile saver: nothing to save, delaying period to: " << save_period_ms;
+      }
     }
-    first_iteration = false;
+    cache_resolved_classes = false;
+
+    total_ns_of_work_ += (NanoTime() - start);
   }
 }
 
+ProfileCompilationInfo* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
+  auto info_it = profile_cache_.find(filename);
+  if (info_it == profile_cache_.end()) {
+    info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+  }
+  return &info_it->second;
+}
+
+void ProfileSaver::FetchAndCacheResolvedClasses() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  std::set<DexCacheResolvedClasses> resolved_classes =
+      class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  uint64_t total_number_of_profile_entries_cached = 0;
+
+  for (const auto& it : tracked_dex_base_locations_) {
+    std::set<DexCacheResolvedClasses> resolved_classes_for_location;
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+
+    for (const DexCacheResolvedClasses& classes : resolved_classes) {
+      if (locations.find(classes.GetBaseLocation()) != locations.end()) {
+        VLOG(profiler) << "Added classes for location " << classes.GetBaseLocation()
+                       << " (" << classes.GetDexLocation() << ")";
+        resolved_classes_for_location.insert(classes);
+      } else {
+        VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
+                       << " (" << classes.GetDexLocation() << ")";
+      }
+    }
+    ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
+    info->AddMethodsAndClasses(std::vector<MethodReference>(), resolved_classes_for_location);
+    total_number_of_profile_entries_cached += resolved_classes_for_location.size();
+  }
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+}
+
 bool ProfileSaver::ProcessProfilingInfo() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
-  if (!first_profile_ && last_update_time_ns - code_cache_last_update_time_ns_
-          < kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update."
-        << "Last update: " << last_update_time_ns
-        << " Last save: " << code_cache_last_update_time_ns_;
-    return false;
-  }
-
-  uint64_t start = NanoTime();
-  code_cache_last_update_time_ns_ = last_update_time_ns;
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
     // Make a copy so that we don't hold the lock while doing I/O.
     MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
     tracked_locations = tracked_dex_base_locations_;
   }
+
+  bool profile_file_saved = false;
+  uint64_t total_number_of_profile_entries_cached = 0;
   for (const auto& it : tracked_locations) {
     if (ShuttingDown(Thread::Current())) {
       return true;
     }
     const std::string& filename = it.first;
     const std::set<std::string>& locations = it.second;
-    std::vector<ArtMethod*> methods;
+    std::vector<MethodReference> methods;
     {
       ScopedObjectAccess soa(Thread::Current());
-      jit_code_cache_->GetCompiledArtMethods(locations, methods);
+      jit_code_cache_->GetProfiledMethods(locations, methods);
+      total_number_of_code_cache_queries_++;
     }
-    // Always save for the first one for loaded classes profile.
-    if (methods.size() < kMinimumNrOrMethodsToSave && !first_profile_) {
+
+    ProfileCompilationInfo* cached_info = GetCachedProfiledInfo(filename);
+    cached_info->AddMethodsAndClasses(methods, std::set<DexCacheResolvedClasses>());
+    int64_t delta_number_of_methods =
+        cached_info->GetNumberOfMethods() -
+        static_cast<int64_t>(last_save_number_of_methods_);
+    int64_t delta_number_of_classes =
+        cached_info->GetNumberOfResolvedClasses() -
+        static_cast<int64_t>(last_save_number_of_classes_);
+
+    if (delta_number_of_methods < kMinimumNumberOfMethodsToSave &&
+        delta_number_of_classes < kMinimumNumberOfClassesToSave) {
       VLOG(profiler) << "Not enough information to save to: " << filename
-          <<" Nr of methods: " << methods.size();
-      return false;
+          << " Nr of methods: " << delta_number_of_methods
+          << " Nr of classes: " << delta_number_of_classes;
+      total_number_of_skipped_writes_++;
+      continue;
     }
-
-    std::set<DexCacheResolvedClasses> resolved_classes;
-    if (first_profile_) {
-      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-      resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/true);
-    }
-
-    if (!ProfileCompilationInfo::SaveProfilingInfo(filename, methods, resolved_classes)) {
+    uint64_t bytes_written;
+    // Force the save. In case the profile data is corrupted or the the profile
+    // has the wrong version this will "fix" the file to the correct format.
+    if (cached_info->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
+      last_save_number_of_methods_ = cached_info->GetNumberOfMethods();
+      last_save_number_of_classes_ = cached_info->GetNumberOfResolvedClasses();
+      // Clear resolved classes. No need to store them around as
+      // they don't change after the first write.
+      cached_info->ClearResolvedClasses();
+      if (bytes_written > 0) {
+        total_number_of_writes_++;
+        total_bytes_written_ += bytes_written;
+        profile_file_saved = true;
+      } else {
+        // At this point we could still have avoided the write.
+        // We load and merge the data from the file lazily at its first ever
+        // save attempt. So, whatever we are trying to save could already be
+        // in the file.
+        total_number_of_skipped_writes_++;
+      }
+    } else {
       LOG(WARNING) << "Could not save profiling info to " << filename;
-      return false;
+      total_number_of_failed_writes_++;
     }
-
-    VLOG(profiler) << "Profile process time: " << PrettyDuration(NanoTime() - start);
+    total_number_of_profile_entries_cached +=
+        cached_info->GetNumberOfMethods() +
+        cached_info->GetNumberOfResolvedClasses();
   }
-  first_profile_ = false;
-  return true;
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+  return profile_file_saved;
 }
 
 void* ProfileSaver::RunProfileSaverThread(void* arg) {
   Runtime* runtime = Runtime::Current();
-  ProfileSaver* profile_saver = reinterpret_cast<ProfileSaver*>(arg);
 
-  CHECK(runtime->AttachCurrentThread("Profile Saver",
-                                     /*as_daemon*/true,
-                                     runtime->GetSystemThreadGroup(),
-                                     /*create_peer*/true));
+  bool attached = runtime->AttachCurrentThread("Profile Saver",
+                                               /*as_daemon*/true,
+                                               runtime->GetSystemThreadGroup(),
+                                               /*create_peer*/true);
+  if (!attached) {
+    CHECK(runtime->IsShuttingDown(Thread::Current()));
+    return nullptr;
+  }
+
+  ProfileSaver* profile_saver = reinterpret_cast<ProfileSaver*>(arg);
   profile_saver->Run();
 
   runtime->DetachCurrentThread();
@@ -183,15 +270,47 @@
   return nullptr;
 }
 
+static bool ShouldProfileLocation(const std::string& location) {
+  OatFileManager& oat_manager = Runtime::Current()->GetOatFileManager();
+  const OatFile* oat_file = oat_manager.FindOpenedOatFileFromDexLocation(location);
+  if (oat_file == nullptr) {
+    // This can happen if we fallback to run code directly from the APK.
+    // Profile it with the hope that the background dexopt will get us back into
+    // a good state.
+    VLOG(profiler) << "Asked to profile a location without an oat file:" << location;
+    return true;
+  }
+  CompilerFilter::Filter filter = oat_file->GetCompilerFilter();
+  if ((filter == CompilerFilter::kSpeed) || (filter == CompilerFilter::kEverything)) {
+    VLOG(profiler)
+        << "Skip profiling oat file because it's already speed|everything compiled: "
+        << location << " oat location: " << oat_file->GetLocation();
+    return false;
+  }
+  return true;
+}
+
 void ProfileSaver::Start(const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
                          const std::vector<std::string>& code_paths,
                          const std::string& foreign_dex_profile_path,
                          const std::string& app_data_dir) {
-  DCHECK(Runtime::Current()->UseJit());
+  DCHECK(Runtime::Current()->SaveProfileInfo());
   DCHECK(!output_filename.empty());
   DCHECK(jit_code_cache != nullptr);
 
+  std::vector<std::string> code_paths_to_profile;
+
+  for (const std::string& location : code_paths) {
+    if (ShouldProfileLocation(location))  {
+      code_paths_to_profile.push_back(location);
+    }
+  }
+  if (code_paths_to_profile.empty()) {
+    VLOG(profiler) << "No code paths should be profiled.";
+    return;
+  }
+
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
     // If we already have an instance, make sure it uses the same jit_code_cache.
@@ -199,16 +318,16 @@
     // apps which share the same runtime).
     DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
     // Add the code_paths to the tracked locations.
-    instance_->AddTrackedLocations(output_filename, code_paths);
+    instance_->AddTrackedLocations(output_filename, app_data_dir, code_paths_to_profile);
     return;
   }
 
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
-      << ". Tracking: " << Join(code_paths, ':');
+      << ". Tracking: " << Join(code_paths_to_profile, ':');
 
   instance_ = new ProfileSaver(output_filename,
                                jit_code_cache,
-                               code_paths,
+                               code_paths_to_profile,
                                foreign_dex_profile_path,
                                app_data_dir);
 
@@ -219,7 +338,7 @@
       "Profile saver thread");
 }
 
-void ProfileSaver::Stop() {
+void ProfileSaver::Stop(bool dump_info) {
   ProfileSaver* profile_saver = nullptr;
   pthread_t profiler_pthread = 0U;
 
@@ -237,6 +356,9 @@
       return;
     }
     instance_->shutting_down_ = true;
+    if (dump_info) {
+      instance_->DumpInfo(LOG(INFO));
+    }
   }
 
   {
@@ -267,49 +389,62 @@
 }
 
 void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::string& app_data_dir,
                                        const std::vector<std::string>& code_paths) {
   auto it = tracked_dex_base_locations_.find(output_filename);
   if (it == tracked_dex_base_locations_.end()) {
     tracked_dex_base_locations_.Put(output_filename,
                                     std::set<std::string>(code_paths.begin(), code_paths.end()));
+    app_data_dirs_.insert(app_data_dir);
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
 void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
+  if (!ShouldProfileLocation(dex_location)) {
+    return;
+  }
   std::set<std::string> app_code_paths;
   std::string foreign_dex_profile_path;
-  std::string app_data_dir;
+  std::set<std::string> app_data_dirs;
   {
     MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    DCHECK(instance_ != nullptr);
+    if (instance_ == nullptr) {
+      return;
+    }
     // Make a copy so that we don't hold the lock while doing I/O.
     for (const auto& it : instance_->tracked_dex_base_locations_) {
       app_code_paths.insert(it.second.begin(), it.second.end());
     }
     foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
-    app_data_dir = instance_->app_data_dir_;
+    app_data_dirs.insert(instance_->app_data_dirs_.begin(), instance_->app_data_dirs_.end());
   }
 
-  MaybeRecordDexUseInternal(dex_location,
-                            app_code_paths,
-                            foreign_dex_profile_path,
-                            app_data_dir);
+  bool mark_created = MaybeRecordDexUseInternal(dex_location,
+                                                app_code_paths,
+                                                foreign_dex_profile_path,
+                                                app_data_dirs);
+  if (mark_created) {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    if (instance_ != nullptr) {
+      instance_->total_number_of_foreign_dex_marks_++;
+    }
+  }
 }
 
-void ProfileSaver::MaybeRecordDexUseInternal(
+bool ProfileSaver::MaybeRecordDexUseInternal(
       const std::string& dex_location,
       const std::set<std::string>& app_code_paths,
       const std::string& foreign_dex_profile_path,
-      const std::string& app_data_dir) {
+      const std::set<std::string>& app_data_dirs) {
   if (dex_location.empty()) {
     LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
-    return;
+    return false;
   }
   if (foreign_dex_profile_path.empty()) {
     LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
-    return;
+    return false;
   }
 
   UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
@@ -320,14 +455,14 @@
     ? dex_location.c_str()
     : dex_location_real_path.get());
 
-  if (dex_location_real_path_str.compare(0, app_data_dir.length(), app_data_dir) == 0) {
+  if (app_data_dirs.find(dex_location_real_path_str) != app_data_dirs.end()) {
     // The dex location is under the application folder. Nothing to record.
-    return;
+    return false;
   }
 
   if (app_code_paths.find(dex_location) != app_code_paths.end()) {
     // The dex location belongs to the application code paths. Nothing to record.
-    return;
+    return false;
   }
   // Do another round of checks with the real paths.
   // Note that we could cache all the real locations in the saver (since it's an expensive
@@ -344,7 +479,7 @@
         : real_app_code_location.get());
     if (real_app_code_location_str == dex_location_real_path_str) {
       // The dex location belongs to the application code paths. Nothing to record.
-      return;
+      return false;
     }
   }
 
@@ -355,19 +490,75 @@
   // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
   std::replace(dex_location_real_path_str.begin(), dex_location_real_path_str.end(), '/', '@');
   std::string flag_path = foreign_dex_profile_path + "/" + dex_location_real_path_str;
-  // No need to give any sort of access to flag_path. The system has enough permissions
-  // to test for its existence.
-  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(), O_CREAT | O_EXCL, 0));
+  // We use O_RDONLY as the access mode because we must supply some access
+  // mode, and there is no access mode that means 'create but do not read' the
+  // file. We will not not actually read from the file.
+  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(),
+        O_CREAT | O_RDONLY | O_EXCL | O_CLOEXEC | O_NOFOLLOW, 0));
   if (fd != -1) {
     if (close(fd) != 0) {
       PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
     }
+    return true;
   } else {
-    if (errno != EEXIST) {
-      // Another app could have already created the file.
+    if (errno != EEXIST && errno != EACCES) {
+      // Another app could have already created the file, and selinux may not
+      // allow the read access to the file implied by the call to open.
       PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
+      return false;
+    }
+    return true;
+  }
+}
+
+void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    instance_->DumpInfo(os);
+  }
+}
+
+void ProfileSaver::DumpInfo(std::ostream& os) {
+  os << "ProfileSaver total_bytes_written=" << total_bytes_written_ << '\n'
+     << "ProfileSaver total_number_of_writes=" << total_number_of_writes_ << '\n'
+     << "ProfileSaver total_number_of_code_cache_queries="
+     << total_number_of_code_cache_queries_ << '\n'
+     << "ProfileSaver total_number_of_skipped_writes=" << total_number_of_skipped_writes_ << '\n'
+     << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
+     << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
+     << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
+     << "ProfileSaver total_number_of_foreign_dex_marks="
+     << total_number_of_foreign_dex_marks_ << '\n'
+     << "ProfileSaver max_number_profile_entries_cached="
+    << max_number_of_profile_entries_cached_ << '\n';
+}
+
+
+void ProfileSaver::ForceProcessProfiles() {
+  ProfileSaver* saver = nullptr;
+  {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    saver = instance_;
+  }
+  // TODO(calin): this is not actually thread safe as the instance_ may have been deleted,
+  // but we only use this in testing when we now this won't happen.
+  // Refactor the way we handle the instance so that we don't end up in this situation.
+  if (saver != nullptr) {
+    saver->ProcessProfilingInfo();
+  }
+}
+
+bool ProfileSaver::HasSeenMethod(const std::string& profile,
+                                 const DexFile* dex_file,
+                                 uint16_t method_idx) {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    ProfileCompilationInfo* info = instance_->GetCachedProfiledInfo(profile);
+    if (info != nullptr) {
+      return info->ContainsMethod(MethodReference(dex_file, method_idx));
     }
   }
+  return false;
 }
 
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index e7eab95..4f3cdc2 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -37,7 +37,7 @@
 
   // Stops the profile saver thread.
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Stop()
+  static void Stop(bool dump_info_)
       REQUIRES(!Locks::profiler_lock_, !wait_lock_)
       NO_THREAD_SAFETY_ANALYSIS;
 
@@ -46,6 +46,15 @@
 
   static void NotifyDexUse(const std::string& dex_location);
 
+  // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
+  static void DumpInstanceInfo(std::ostream& os);
+
+  // Just for testing purpose.
+  static void ForceProcessProfiles();
+  static bool HasSeenMethod(const std::string& profile,
+                            const DexFile* dex_file,
+                            uint16_t method_idx);
+
  private:
   ProfileSaver(const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
@@ -62,19 +71,33 @@
   void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
   // Processes the existing profiling info from the jit code cache and returns
   // true if it needed to be saved to disk.
-  bool ProcessProfilingInfo();
+  bool ProcessProfilingInfo()
+    REQUIRES(!Locks::profiler_lock_)
+    REQUIRES(!Locks::mutator_lock_);
+
   // Returns true if the saver is shutting down (ProfileSaver::Stop() has been called).
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
   void AddTrackedLocations(const std::string& output_filename,
+                           const std::string& app_data_dir,
                            const std::vector<std::string>& code_paths)
       REQUIRES(Locks::profiler_lock_);
 
-  static void MaybeRecordDexUseInternal(
+  // Retrieves the cached profile compilation info for the given profile file.
+  // If no entry exists, a new empty one will be created, added to the cache and
+  // then returned.
+  ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
+  // Fetches the current resolved classes from the ClassLinker and stores them
+  // in the profile_cache_ for later save.
+  void FetchAndCacheResolvedClasses();
+
+  static bool MaybeRecordDexUseInternal(
       const std::string& dex_location,
       const std::set<std::string>& tracked_locations,
       const std::string& foreign_dex_profile_path,
-      const std::string& app_data_dir);
+      const std::set<std::string>& app_data_dirs);
+
+  void DumpInfo(std::ostream& os);
 
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
@@ -82,18 +105,44 @@
   static pthread_t profiler_pthread_ GUARDED_BY(Locks::profiler_lock_);
 
   jit::JitCodeCache* jit_code_cache_;
+
+  // Collection of code paths that the profiles tracks.
+  // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
+  // The directory were the we should store the code paths.
   std::string foreign_dex_profile_path_;
-  std::string app_data_dir_;
-  uint64_t code_cache_last_update_time_ns_;
+
+  // A list of application directories, used to infer if a loaded dex belongs
+  // to the application or not. Multiple application data directories are possible when
+  // different apps share the same runtime.
+  std::set<std::string> app_data_dirs_ GUARDED_BY(Locks::profiler_lock_);
+
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-  bool first_profile_ = true;
+  uint32_t last_save_number_of_methods_;
+  uint32_t last_save_number_of_classes_;
+
+  // A local cache for the profile information. Maps each tracked file to its
+  // profile information. The size of this cache is usually very small and tops
+  // to just a few hundreds entries in the ProfileCompilationInfo objects.
+  // It helps avoiding unnecessary writes to disk.
+  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
 
+  uint64_t total_bytes_written_;
+  uint64_t total_number_of_writes_;
+  uint64_t total_number_of_code_cache_queries_;
+  uint64_t total_number_of_skipped_writes_;
+  uint64_t total_number_of_failed_writes_;
+  uint64_t total_ms_of_sleep_;
+  uint64_t total_ns_of_work_;
+  uint64_t total_number_of_foreign_dex_marks_;
+  // TODO(calin): replace with an actual size.
+  uint64_t max_number_of_profile_entries_cached_;
+
   DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
 };
 
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 55d627a..d04d2de 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -119,12 +119,18 @@
 
   InlineCache* GetInlineCache(uint32_t dex_pc);
 
-  bool IsMethodBeingCompiled() const {
-    return is_method_being_compiled_;
+  bool IsMethodBeingCompiled(bool osr) const {
+    return osr
+        ? is_osr_method_being_compiled_
+        : is_method_being_compiled_;
   }
 
-  void SetIsMethodBeingCompiled(bool value) {
-    is_method_being_compiled_ = value;
+  void SetIsMethodBeingCompiled(bool value, bool osr) {
+    if (osr) {
+      is_osr_method_being_compiled_ = value;
+    } else {
+      is_method_being_compiled_ = value;
+    }
   }
 
   void SetSavedEntryPoint(const void* entry_point) {
@@ -155,7 +161,8 @@
   }
 
   bool IsInUseByCompiler() const {
-    return IsMethodBeingCompiled() || (current_inline_uses_ > 0);
+    return IsMethodBeingCompiled(/*osr*/ true) || IsMethodBeingCompiled(/*osr*/ false) ||
+        (current_inline_uses_ > 0);
   }
 
  private:
@@ -163,6 +170,7 @@
       : number_of_inline_caches_(entries.size()),
         method_(method),
         is_method_being_compiled_(false),
+        is_osr_method_being_compiled_(false),
         current_inline_uses_(0),
         saved_entry_point_(nullptr) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
@@ -181,6 +189,7 @@
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.
   bool is_method_being_compiled_;
+  bool is_osr_method_being_compiled_;
 
   // When the compiler inlines the method associated to this ProfilingInfo,
   // it updates this counter so that the GC does not try to clear the inline caches.
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index c718466..04ba8df 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -2286,16 +2286,16 @@
 // Test the offset computation of JNIEnvExt offsets. b/26071368.
 TEST_F(JniInternalTest, JNIEnvExtOffsets) {
   EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie),
-            JNIEnvExt::LocalRefCookieOffset(sizeof(void*)).Int32Value());
+            JNIEnvExt::LocalRefCookieOffset(sizeof(void*)).Uint32Value());
 
-  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, self), JNIEnvExt::SelfOffset(sizeof(void*)).Int32Value());
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, self), JNIEnvExt::SelfOffset(sizeof(void*)).Uint32Value());
 
   // segment_state_ is private in the IndirectReferenceTable. So this test isn't as good as we'd
   // hope it to be.
-  int32_t segment_state_now =
+  uint32_t segment_state_now =
       OFFSETOF_MEMBER(JNIEnvExt, locals) +
-      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
-  int32_t segment_state_computed = JNIEnvExt::SegmentStateOffset(sizeof(void*)).Int32Value();
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Uint32Value();
+  uint32_t segment_state_computed = JNIEnvExt::SegmentStateOffset(sizeof(void*)).Uint32Value();
   EXPECT_EQ(segment_state_now, segment_state_computed);
 }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 421641c..771f8ed 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -302,8 +302,9 @@
 
   if (use_ashmem) {
     if (!kIsTargetBuild) {
-      // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-      // fail due to ulimit restrictions. If they will then use a regular mmap.
+      // When not on Android (either host or assuming a linux target) ashmem is faked using
+      // files in /tmp. Ensure that such files won't fail due to ulimit restrictions. If they
+      // will then use a regular mmap.
       struct rlimit rlimit_fsize;
       CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
       use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
@@ -426,7 +427,9 @@
     if (error_msg != nullptr) {
       auto saved_errno = errno;
 
-      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      if (kIsDebugBuild || VLOG_IS_ON(oat)) {
+        PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      }
 
       *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
                                 ") of file '%s' failed: %s. See process maps in the log.",
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 3eaf576..597f0d4 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -68,7 +68,7 @@
                               bool low_4gb,
                               bool reuse,
                               std::string* error_msg,
-                              bool use_ashmem = true);
+                              bool use_ashmem = !kIsTargetLinux);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -172,7 +172,7 @@
                      const char* tail_name,
                      int tail_prot,
                      std::string* error_msg,
-                     bool use_ashmem = true);
+                     bool use_ashmem = !kIsTargetLinux);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 13c69ac..fbb0441 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -138,7 +138,7 @@
   // bit of the stored `value`.  `value` must not be larger than `length`
   // bits.
   void StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
-    CHECK_LT(value, 2u << length);
+    CHECK_LE(value, MaxInt<uint32_t>(length));
     for (size_t i = 0; i < length; ++i) {
       bool ith_bit = value & (1 << i);
       StoreBit(bit_offset + i, ith_bit);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 7900eac..b4a23ba 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -880,9 +880,10 @@
   DCHECK(!IsInterface());
   mirror::Class* common_super_class = this;
   while (!common_super_class->IsAssignableFrom(klass.Get())) {
-    common_super_class = common_super_class->GetSuperClass();
+    mirror::Class* old_common = common_super_class;
+    common_super_class = old_common->GetSuperClass();
+    DCHECK(common_super_class != nullptr) << PrettyClass(old_common);
   }
-  DCHECK(common_super_class != nullptr);
   return common_super_class;
 }
 
@@ -1023,8 +1024,8 @@
 
 // TODO: Move this to java_lang_Class.cc?
 ArtMethod* Class::GetDeclaredConstructor(
-    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args) {
-  for (auto& m : GetDirectMethods(sizeof(void*))) {
+    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size) {
+  for (auto& m : GetDirectMethods(pointer_size)) {
     // Skip <clinit> which is a static constructor, as well as non constructors.
     if (m.IsStatic() || !m.IsConstructor()) {
       continue;
@@ -1138,5 +1139,42 @@
                                                        mirror::String* name,
                                                        mirror::ObjectArray<mirror::Class>* args);
 
+template <bool kTransactionActive>
+mirror::Constructor* Class::GetDeclaredConstructorInternal(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args) {
+  StackHandleScope<1> hs(self);
+  const size_t pointer_size = kTransactionActive
+                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
+                                  : sizeof(void*);
+  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), pointer_size);
+  return result != nullptr
+      ? mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+      : nullptr;
+}
+
+// mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+
+int32_t Class::GetInnerClassFlags(Handle<Class> h_this, int32_t default_value) {
+  if (h_this->IsProxyClass() || h_this->GetDexCache() == nullptr) {
+    return default_value;
+  }
+  uint32_t flags;
+  if (!h_this->GetDexFile().GetInnerClassFlags(h_this, &flags)) {
+    return default_value;
+  }
+  return flags;
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 7082c88..5b6ded1 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -299,7 +299,9 @@
   // Mutually exclusive from whether or not each method is allowed to skip access checks.
   void SetVerificationAttempted() SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
-    SetAccessFlags(flags | kAccVerificationAttempted);
+    if ((flags & kAccVerificationAttempted) == 0) {
+      SetAccessFlags(flags | kAccVerificationAttempted);
+    }
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -766,6 +768,11 @@
                                            mirror::String* name,
                                            mirror::ObjectArray<mirror::Class>* args)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template <bool kTransactionActive = false>
+  static Constructor* GetDeclaredConstructorInternal(Thread* self,
+                                                     mirror::Class* klass,
+                                                     mirror::ObjectArray<mirror::Class>* args)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(size_t pointer_size)
@@ -1213,7 +1220,10 @@
 
   // May cause thread suspension due to EqualParameters.
   ArtMethod* GetDeclaredConstructor(
-      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args)
+      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  static int32_t GetInnerClassFlags(Handle<Class> h_this, int32_t default_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index 84fa80f..cc910b0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -34,7 +34,6 @@
   VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   if (kVisitClasses) {
     // Visit classes loaded after.
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
     ClassTable* const class_table = GetClassTable();
     if (class_table != nullptr) {
       class_table->VisitRoots(visitor);
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 97973e6..9838b71 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -96,14 +96,18 @@
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+template <bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod(method);
+    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
   }
   return ret;
 }
 
+template Constructor* Constructor::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/method.h b/runtime/mirror/method.h
index 12a72fe..0b56964 100644
--- a/runtime/mirror/method.h
+++ b/runtime/mirror/method.h
@@ -60,6 +60,7 @@
 // C++ mirror of java.lang.reflect.Constructor.
 class MANAGED Constructor: public AbstractMethod {
  public:
+  template <bool kTransactionActive = false>
   static Constructor* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 3f739df..0ee46c3 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_H_
 #define ART_RUNTIME_MIRROR_OBJECT_H_
 
+#include "base/casts.h"
 #include "globals.h"
 #include "object_reference.h"
 #include "offsets.h"
@@ -490,7 +491,7 @@
           field_offset, static_cast<int32_t>(ptr));
     } else {
       SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-          field_offset, static_cast<int64_t>(reinterpret_cast<uintptr_t>(new_value)));
+          field_offset, reinterpret_cast64<int64_t>(new_value));
     }
   }
   // TODO fix thread safety analysis broken by the use of template. This should be
@@ -531,9 +532,7 @@
       return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
     } else {
       int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
-      // Check that we dont lose any non 0 bits.
-      DCHECK_EQ(static_cast<int64_t>(static_cast<uintptr_t>(v)), v);
-      return reinterpret_cast<T>(static_cast<uintptr_t>(v));
+      return reinterpret_cast64<T>(v);
     }
   }
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 6f9d642..c3c5231 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -197,6 +197,7 @@
 }
 
 template<class T>
+template<bool kTransactionActive>
 inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
                                                      int32_t src_pos, int32_t count,
                                                      bool throw_exception) {
@@ -215,15 +216,15 @@
     o = src->GetWithoutChecks(src_pos + i);
     if (o == nullptr) {
       // Null is always assignable.
-      SetWithoutChecks<false>(dst_pos + i, nullptr);
+      SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
     } else {
       // TODO: use the underlying class reference to avoid uncompression when not necessary.
       Class* o_class = o->GetClass();
       if (LIKELY(lastAssignableElementClass == o_class)) {
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
         lastAssignableElementClass = o_class;
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else {
         // Can't put this element into the array, break to perform write-barrier and throw
         // exception.
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 1b1295c..4257396 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -78,6 +78,7 @@
                         int32_t count) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Copy src into this array with assignability checks.
+  template<bool kTransactionActive>
   void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
                                 int32_t count, bool throw_exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index cdf468c..6285542 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -33,8 +33,8 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 53;
-  return Class::ComputeClassSize(true, vtable_entries, 0, 2, 0, 1, 2, pointer_size);
+  uint32_t vtable_entries = Object::kVTableLength + 56;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
 // Sets string count in the allocation code path to ensure it is guarded by a CAS.
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index c31b22e..fd7a125 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -60,7 +60,13 @@
 // This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
 // if any particular method needs to be a default conflict. Used to figure out at runtime if
 // invoking this method will throw an exception.
-static constexpr uint32_t kAccDefaultConflict =      0x00800000;  // method (runtime)
+static constexpr uint32_t kAccDefaultConflict =       0x00800000;  // method (runtime)
+
+// Set by the verifier for a method we do not want the compiler to compile.
+static constexpr uint32_t kAccCompileDontBother =     0x01000000;  // method (runtime)
+
+// Set by the verifier for a method that could not be verified to follow structured locking.
+static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
 
 // Special runtime-only flags.
 // Interface and all its super-interfaces with default methods have been recursively initialized.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index a262c7a..f4bc222 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -67,19 +67,10 @@
  * at any given time.
  */
 
-bool (*Monitor::is_sensitive_thread_hook_)() = nullptr;
 uint32_t Monitor::lock_profiling_threshold_ = 0;
 
-bool Monitor::IsSensitiveThread() {
-  if (is_sensitive_thread_hook_ != nullptr) {
-    return (*is_sensitive_thread_hook_)();
-  }
-  return false;
-}
-
-void Monitor::Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)()) {
+void Monitor::Init(uint32_t lock_profiling_threshold) {
   lock_profiling_threshold_ = lock_profiling_threshold;
-  is_sensitive_thread_hook_ = is_sensitive_thread_hook;
 }
 
 Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
@@ -224,6 +215,105 @@
   obj_ = GcRoot<mirror::Object>(object);
 }
 
+// Note: Adapted from CurrentMethodVisitor in thread.cc. We must not resolve here.
+
+struct NthCallerWithDexPcVisitor FINAL : public StackVisitor {
+  explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+        method_(nullptr),
+        dex_pc_(0),
+        current_frame_number_(0),
+        wanted_frame_number_(frame) {}
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    if (m == nullptr || m->IsRuntimeMethod()) {
+      // Runtime method, upcall, or resolution issue. Skip.
+      return true;
+    }
+
+    // Is this the requested frame?
+    if (current_frame_number_ == wanted_frame_number_) {
+      method_ = m;
+      dex_pc_ = GetDexPc(false /* abort_on_error*/);
+      return false;
+    }
+
+    // Look for more.
+    current_frame_number_++;
+    return true;
+  }
+
+  ArtMethod* method_;
+  uint32_t dex_pc_;
+
+ private:
+  size_t current_frame_number_;
+  const size_t wanted_frame_number_;
+};
+
+// This function is inlined and just helps to not have the VLOG and ATRACE check at all the
+// potential tracing points.
+void Monitor::AtraceMonitorLock(Thread* self, mirror::Object* obj, bool is_wait) {
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging) && ATRACE_ENABLED())) {
+    AtraceMonitorLockImpl(self, obj, is_wait);
+  }
+}
+
+void Monitor::AtraceMonitorLockImpl(Thread* self, mirror::Object* obj, bool is_wait) {
+  // Wait() requires a deeper call stack to be useful. Otherwise you'll see "Waiting at
+  // Object.java". Assume that we'll wait a nontrivial amount, so it's OK to do a longer
+  // stack walk than if !is_wait.
+  NthCallerWithDexPcVisitor visitor(self, is_wait ? 1U : 0U);
+  visitor.WalkStack(false);
+  const char* prefix = is_wait ? "Waiting on " : "Locking ";
+
+  const char* filename;
+  int32_t line_number;
+  TranslateLocation(visitor.method_, visitor.dex_pc_, &filename, &line_number);
+
+  // It would be nice to have a stable "ID" for the object here. However, the only stable thing
+  // would be the identity hashcode. But we cannot use IdentityHashcode here: For one, there are
+  // times when it is unsafe to make that call (see stack dumping for an explanation). More
+  // importantly, we would have to give up on thin-locking when adding systrace locks, as the
+  // identity hashcode is stored in the lockword normally (so can't be used with thin-locks).
+  //
+  // Because of thin-locks we also cannot use the monitor id (as there is no monitor). Monitor ids
+  // also do not have to be stable, as the monitor may be deflated.
+  std::string tmp = StringPrintf("%s %d at %s:%d",
+      prefix,
+      (obj == nullptr ? -1 : static_cast<int32_t>(reinterpret_cast<uintptr_t>(obj))),
+      (filename != nullptr ? filename : "null"),
+      line_number);
+  ATRACE_BEGIN(tmp.c_str());
+}
+
+void Monitor::AtraceMonitorUnlock() {
+  if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
+    ATRACE_END();
+  }
+}
+
+std::string Monitor::PrettyContentionInfo(const std::string& owner_name,
+                                          pid_t owner_tid,
+                                          ArtMethod* owners_method,
+                                          uint32_t owners_dex_pc,
+                                          size_t num_waiters) {
+  const char* owners_filename;
+  int32_t owners_line_number = 0;
+  if (owners_method != nullptr) {
+    TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
+  }
+  std::ostringstream oss;
+  oss << "monitor contention with owner " << owner_name << " (" << owner_tid << ")";
+  if (owners_method != nullptr) {
+    oss << " at " << PrettyMethod(owners_method);
+    oss << "(" << owners_filename << ":" << owners_line_number << ")";
+  }
+  oss << " waiters=" << num_waiters;
+  return oss.str();
+}
+
 void Monitor::Lock(Thread* self) {
   MutexLock mu(self, monitor_lock_);
   while (true) {
@@ -235,10 +325,10 @@
       if (lock_profiling_threshold_ != 0) {
         locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
       }
-      return;
+      break;
     } else if (owner_ == self) {  // Recursive.
       lock_count_++;
-      return;
+      break;
     }
     // Contended.
     const bool log_contention = (lock_profiling_threshold_ != 0);
@@ -251,36 +341,86 @@
     monitor_lock_.Unlock(self);  // Let go of locks in order.
     self->SetMonitorEnterObject(GetObject());
     {
+      uint32_t original_owner_thread_id = 0u;
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
-      // Reacquire monitor_lock_ without mutator_lock_ for Wait.
-      MutexLock mu2(self, monitor_lock_);
-      if (owner_ != nullptr) {  // Did the owner_ give the lock up?
-        if (ATRACE_ENABLED()) {
-          std::string name;
-          owner_->GetThreadName(name);
-          ATRACE_BEGIN(("Contended on monitor with owner " + name).c_str());
+      {
+        // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+        MutexLock mu2(self, monitor_lock_);
+        if (owner_ != nullptr) {  // Did the owner_ give the lock up?
+          original_owner_thread_id = owner_->GetThreadId();
+          if (ATRACE_ENABLED()) {
+            std::ostringstream oss;
+            std::string name;
+            owner_->GetThreadName(name);
+            oss << PrettyContentionInfo(name,
+                                        owner_->GetTid(),
+                                        owners_method,
+                                        owners_dex_pc,
+                                        num_waiters);
+            // Add info for contending thread.
+            uint32_t pc;
+            ArtMethod* m = self->GetCurrentMethod(&pc);
+            const char* filename;
+            int32_t line_number;
+            TranslateLocation(m, pc, &filename, &line_number);
+            oss << " blocking from "
+                << PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null") << ":"
+                << line_number << ")";
+            ATRACE_BEGIN(oss.str().c_str());
+          }
+          monitor_contenders_.Wait(self);  // Still contended so wait.
         }
-        monitor_contenders_.Wait(self);  // Still contended so wait.
+      }
+      if (original_owner_thread_id != 0u) {
         // Woken from contention.
         if (log_contention) {
-          uint64_t wait_ms = MilliTime() - wait_start_ms;
-          uint32_t sample_percent;
-          if (wait_ms >= lock_profiling_threshold_) {
-            sample_percent = 100;
-          } else {
-            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
-          }
-          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-            const char* owners_filename;
-            int32_t owners_line_number;
-            TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
-            if (wait_ms > kLongWaitMs && owners_method != nullptr) {
-              LOG(WARNING) << "Long monitor contention event with owner method="
-                  << PrettyMethod(owners_method) << " from " << owners_filename << ":"
-                  << owners_line_number << " waiters=" << num_waiters << " for "
-                  << PrettyDuration(MsToNs(wait_ms));
+          uint32_t original_owner_tid = 0;
+          std::string original_owner_name;
+          {
+            MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
+            // Re-find the owner in case the thread got killed.
+            Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
+                original_owner_thread_id);
+            // Do not do any work that requires the mutator lock.
+            if (original_owner != nullptr) {
+              original_owner_tid = original_owner->GetTid();
+              original_owner->GetThreadName(original_owner_name);
             }
-            LogContentionEvent(self, wait_ms, sample_percent, owners_filename, owners_line_number);
+          }
+
+          if (original_owner_tid != 0u) {
+            uint64_t wait_ms = MilliTime() - wait_start_ms;
+            uint32_t sample_percent;
+            if (wait_ms >= lock_profiling_threshold_) {
+              sample_percent = 100;
+            } else {
+              sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+            }
+            if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+              if (wait_ms > kLongWaitMs && owners_method != nullptr) {
+                uint32_t pc;
+                ArtMethod* m = self->GetCurrentMethod(&pc);
+                // TODO: We should maybe check that original_owner is still a live thread.
+                LOG(WARNING) << "Long "
+                    << PrettyContentionInfo(original_owner_name,
+                                            original_owner_tid,
+                                            owners_method,
+                                            owners_dex_pc,
+                                            num_waiters)
+                    << " in " << PrettyMethod(m) << " for " << PrettyDuration(MsToNs(wait_ms));
+              }
+              const char* owners_filename;
+              int32_t owners_line_number;
+              TranslateLocation(owners_method,
+                                owners_dex_pc,
+                                &owners_filename,
+                                &owners_line_number);
+              LogContentionEvent(self,
+                                 wait_ms,
+                                 sample_percent,
+                                 owners_filename,
+                                 owners_line_number);
+            }
           }
         }
         ATRACE_END();
@@ -290,6 +430,8 @@
     monitor_lock_.Lock(self);  // Reacquire locks in order.
     --num_waiters_;
   }
+
+  AtraceMonitorLock(self, GetObject(), false /* is_wait */);
 }
 
 static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...)
@@ -320,25 +462,34 @@
   return oss.str();
 }
 
-void Monitor::FailedUnlock(mirror::Object* o, Thread* expected_owner, Thread* found_owner,
+void Monitor::FailedUnlock(mirror::Object* o,
+                           uint32_t expected_owner_thread_id,
+                           uint32_t found_owner_thread_id,
                            Monitor* monitor) {
-  Thread* current_owner = nullptr;
+  // Acquire thread list lock so threads won't disappear from under us.
   std::string current_owner_string;
   std::string expected_owner_string;
   std::string found_owner_string;
+  uint32_t current_owner_thread_id = 0u;
   {
-    // TODO: isn't this too late to prevent threads from disappearing?
-    // Acquire thread list lock so threads won't disappear from under us.
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    ThreadList* const thread_list = Runtime::Current()->GetThreadList();
+    Thread* expected_owner = thread_list->FindThreadByThreadId(expected_owner_thread_id);
+    Thread* found_owner = thread_list->FindThreadByThreadId(found_owner_thread_id);
+
     // Re-read owner now that we hold lock.
-    current_owner = (monitor != nullptr) ? monitor->GetOwner() : nullptr;
+    Thread* current_owner = (monitor != nullptr) ? monitor->GetOwner() : nullptr;
+    if (current_owner != nullptr) {
+      current_owner_thread_id = current_owner->GetThreadId();
+    }
     // Get short descriptions of the threads involved.
     current_owner_string = ThreadToString(current_owner);
-    expected_owner_string = ThreadToString(expected_owner);
-    found_owner_string = ThreadToString(found_owner);
+    expected_owner_string = expected_owner != nullptr ? ThreadToString(expected_owner) : "unnamed";
+    found_owner_string = found_owner != nullptr ? ThreadToString(found_owner) : "unnamed";
   }
-  if (current_owner == nullptr) {
-    if (found_owner == nullptr) {
+
+  if (current_owner_thread_id == 0u) {
+    if (found_owner_thread_id == 0u) {
       ThrowIllegalMonitorStateExceptionF("unlock of unowned monitor on object of type '%s'"
                                          " on thread '%s'",
                                          PrettyTypeOf(o).c_str(),
@@ -352,7 +503,7 @@
                                          expected_owner_string.c_str());
     }
   } else {
-    if (found_owner == nullptr) {
+    if (found_owner_thread_id == 0u) {
       // Race: originally there was no owner, there is now
       ThrowIllegalMonitorStateExceptionF("unlock of monitor owned by '%s' on object of type '%s'"
                                          " (originally believed to be unowned) on thread '%s'",
@@ -360,7 +511,7 @@
                                          PrettyTypeOf(o).c_str(),
                                          expected_owner_string.c_str());
     } else {
-      if (found_owner != current_owner) {
+      if (found_owner_thread_id != current_owner_thread_id) {
         // Race: originally found and current owner have changed
         ThrowIllegalMonitorStateExceptionF("unlock of monitor originally owned by '%s' (now"
                                            " owned by '%s') on object of type '%s' on thread '%s'",
@@ -381,27 +532,32 @@
 
 bool Monitor::Unlock(Thread* self) {
   DCHECK(self != nullptr);
-  MutexLock mu(self, monitor_lock_);
-  Thread* owner = owner_;
-  if (owner == self) {
-    // We own the monitor, so nobody else can be in here.
-    if (lock_count_ == 0) {
-      owner_ = nullptr;
-      locking_method_ = nullptr;
-      locking_dex_pc_ = 0;
-      // Wake a contender.
-      monitor_contenders_.Signal(self);
-    } else {
-      --lock_count_;
+  uint32_t owner_thread_id = 0u;
+  {
+    MutexLock mu(self, monitor_lock_);
+    Thread* owner = owner_;
+    if (owner != nullptr) {
+      owner_thread_id = owner->GetThreadId();
     }
-  } else {
-    // We don't own this, so we're not allowed to unlock it.
-    // The JNI spec says that we should throw IllegalMonitorStateException
-    // in this case.
-    FailedUnlock(GetObject(), self, owner, this);
-    return false;
+    if (owner == self) {
+      // We own the monitor, so nobody else can be in here.
+      AtraceMonitorUnlock();
+      if (lock_count_ == 0) {
+        owner_ = nullptr;
+        locking_method_ = nullptr;
+        locking_dex_pc_ = 0;
+        // Wake a contender.
+        monitor_contenders_.Signal(self);
+      } else {
+        --lock_count_;
+      }
+      return true;
+    }
   }
-  return true;
+  // We don't own this, so we're not allowed to unlock it.
+  // The JNI spec says that we should throw IllegalMonitorStateException in this case.
+  FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
+  return false;
 }
 
 void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
@@ -452,6 +608,11 @@
   uintptr_t saved_dex_pc = locking_dex_pc_;
   locking_dex_pc_ = 0;
 
+  AtraceMonitorUnlock();  // For the implict Unlock() just above. This will only end the deepest
+                          // nesting, but that is enough for the visualization, and corresponds to
+                          // the single Lock() we do afterwards.
+  AtraceMonitorLock(self, GetObject(), true /* is_wait */);
+
   bool was_interrupted = false;
   {
     // Update thread state. If the GC wakes up, it'll ignore us, knowing
@@ -515,6 +676,8 @@
     self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
   }
 
+  AtraceMonitorUnlock();  // End Wait().
+
   // Re-acquire the monitor and lock.
   Lock(self);
   monitor_lock_.Lock(self);
@@ -704,6 +867,7 @@
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
         if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
+          AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
           // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
         }
@@ -719,10 +883,12 @@
                                                           lock_word.ReadBarrierState()));
             if (!kUseReadBarrier) {
               h_obj->SetLockWord(thin_locked, true);
+              AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
               return h_obj.Get();  // Success!
             } else {
               // Use CAS to preserve the read barrier state.
               if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
+                AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
                 return h_obj.Get();  // Success!
               }
             }
@@ -759,7 +925,7 @@
         continue;  // Start from the beginning.
       default: {
         LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
-        return h_obj.Get();
+        UNREACHABLE();
       }
     }
   }
@@ -778,16 +944,13 @@
       case LockWord::kHashCode:
         // Fall-through.
       case LockWord::kUnlocked:
-        FailedUnlock(h_obj.Get(), self, nullptr, nullptr);
+        FailedUnlock(h_obj.Get(), self->GetThreadId(), 0u, nullptr);
         return false;  // Failure.
       case LockWord::kThinLocked: {
         uint32_t thread_id = self->GetThreadId();
         uint32_t owner_thread_id = lock_word.ThinLockOwner();
         if (owner_thread_id != thread_id) {
-          // TODO: there's a race here with the owner dying while we unlock.
-          Thread* owner =
-              Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-          FailedUnlock(h_obj.Get(), self, owner, nullptr);
+          FailedUnlock(h_obj.Get(), thread_id, owner_thread_id, nullptr);
           return false;  // Failure.
         } else {
           // We own the lock, decrease the recursion count.
@@ -801,11 +964,17 @@
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
             h_obj->SetLockWord(new_lw, true);
+            if (ATRACE_ENABLED()) {
+              ATRACE_END();
+            }
             // Success!
             return true;
           } else {
             // Use CAS to preserve the read barrier state.
             if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) {
+              if (ATRACE_ENABLED()) {
+                ATRACE_END();
+              }
               // Success!
               return true;
             }
@@ -1081,8 +1250,10 @@
   return owner_ != nullptr;
 }
 
-void Monitor::TranslateLocation(ArtMethod* method, uint32_t dex_pc,
-                                const char** source_file, int32_t* line_number) const {
+void Monitor::TranslateLocation(ArtMethod* method,
+                                uint32_t dex_pc,
+                                const char** source_file,
+                                int32_t* line_number) {
   // If method is null, location is unknown
   if (method == nullptr) {
     *source_file = "";
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 707d0f1..7b4b8f9 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -55,8 +55,7 @@
 
   ~Monitor();
 
-  static bool IsSensitiveThread();
-  static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
+  static void Init(uint32_t lock_profiling_threshold);
 
   // Return the thread id of the lock owner or 0 when there is no owner.
   static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
@@ -186,9 +185,12 @@
                           const char* owner_filename, int32_t owner_line_number)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static void FailedUnlock(mirror::Object* obj, Thread* expected_owner, Thread* found_owner,
+  static void FailedUnlock(mirror::Object* obj,
+                           uint32_t expected_owner_thread_id,
+                           uint32_t found_owner_thread_id,
                            Monitor* mon)
-      REQUIRES(!Locks::thread_list_lock_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void Lock(Thread* self)
@@ -209,6 +211,13 @@
       REQUIRES(!monitor_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static std::string PrettyContentionInfo(const std::string& owner_name,
+                                          pid_t owner_tid,
+                                          ArtMethod* owners_method,
+                                          uint32_t owners_dex_pc,
+                                          size_t num_waiters)
+      REQUIRES(!Locks::thread_list_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Wait on a monitor until timeout, interrupt, or notification.  Used for Object.wait() and
   // (somewhat indirectly) Thread.sleep() and Thread.join().
@@ -234,13 +243,24 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
-  void TranslateLocation(ArtMethod* method, uint32_t pc,
-                         const char** source_file, int32_t* line_number) const
+  static void TranslateLocation(ArtMethod* method, uint32_t pc,
+                                const char** source_file,
+                                int32_t* line_number)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uint32_t GetOwnerThreadId() REQUIRES(!monitor_lock_);
 
-  static bool (*is_sensitive_thread_hook_)();
+  // Support for systrace output of monitor operations.
+  ALWAYS_INLINE static void AtraceMonitorLock(Thread* self,
+                                              mirror::Object* obj,
+                                              bool is_wait)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  static void AtraceMonitorLockImpl(Thread* self,
+                                    mirror::Object* obj,
+                                    bool is_wait)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE static void AtraceMonitorUnlock();
+
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 82ef2d8..671cb60 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -66,7 +66,7 @@
   cp = EventLogWriteString(cp, procName, len);
 
   // Emit the sensitive thread ("main thread") status, 5 bytes.
-  cp = EventLogWriteInt(cp, Monitor::IsSensitiveThread());
+  cp = EventLogWriteInt(cp, Thread::IsSensitiveThread());
 
   // Emit self thread name string, <= 37 bytes.
   std::string thread_name;
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index ce38e4f..a47a4b2 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -28,7 +28,11 @@
 }  // namespace mirror
 
 MonitorPool::MonitorPool()
-    : num_chunks_(0), capacity_(0), first_free_(nullptr) {
+    : current_chunk_list_index_(0), num_chunks_(0), current_chunk_list_capacity_(0),
+    first_free_(nullptr) {
+  for (size_t i = 0; i < kMaxChunkLists; ++i) {
+    monitor_chunks_[i] = nullptr;  // Not absolutely required, but ...
+  }
   AllocateChunk();  // Get our first chunk.
 }
 
@@ -37,24 +41,19 @@
 void MonitorPool::AllocateChunk() {
   DCHECK(first_free_ == nullptr);
 
-  // Do we need to resize?
-  if (num_chunks_ == capacity_) {
-    if (capacity_ == 0U) {
-      // Initialization.
-      capacity_ = kInitialChunkStorage;
-      uintptr_t* new_backing = new uintptr_t[capacity_]();
-      DCHECK(monitor_chunks_.LoadRelaxed() == nullptr);
-      monitor_chunks_.StoreRelaxed(new_backing);
-    } else {
-      size_t new_capacity = 2 * capacity_;
-      uintptr_t* new_backing = new uintptr_t[new_capacity]();
-      uintptr_t* old_backing = monitor_chunks_.LoadRelaxed();
-      memcpy(new_backing, old_backing, sizeof(uintptr_t) * capacity_);
-      monitor_chunks_.StoreRelaxed(new_backing);
-      capacity_ = new_capacity;
-      old_chunk_arrays_.push_back(std::unique_ptr<uintptr_t[]>(old_backing));
-      VLOG(monitor) << "Resizing to capacity " << capacity_;
-    }
+  // Do we need to allocate another chunk list?
+  if (num_chunks_ == current_chunk_list_capacity_) {
+    if (current_chunk_list_capacity_ != 0U) {
+      ++current_chunk_list_index_;
+      CHECK_LT(current_chunk_list_index_, kMaxChunkLists) << "Out of space for inflated monitors";
+      VLOG(monitor) << "Expanding to capacity "
+          << 2 * ChunkListCapacity(current_chunk_list_index_) - kInitialChunkStorage;
+    }  // else we're initializing
+    current_chunk_list_capacity_ = ChunkListCapacity(current_chunk_list_index_);
+    uintptr_t* new_list = new uintptr_t[current_chunk_list_capacity_]();
+    DCHECK(monitor_chunks_[current_chunk_list_index_] == nullptr);
+    monitor_chunks_[current_chunk_list_index_] = new_list;
+    num_chunks_ = 0;
   }
 
   // Allocate the chunk.
@@ -65,7 +64,7 @@
   CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment);
 
   // Add the chunk.
-  *(monitor_chunks_.LoadRelaxed() + num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
+  monitor_chunks_[current_chunk_list_index_][num_chunks_] = reinterpret_cast<uintptr_t>(chunk);
   num_chunks_++;
 
   // Set up the free list
@@ -73,8 +72,8 @@
                                              (kChunkCapacity - 1) * kAlignedMonitorSize);
   last->next_free_ = nullptr;
   // Eagerly compute id.
-  last->monitor_id_ = OffsetToMonitorId((num_chunks_ - 1) * kChunkSize +
-                                        (kChunkCapacity - 1) * kAlignedMonitorSize);
+  last->monitor_id_ = OffsetToMonitorId(current_chunk_list_index_* (kMaxListSize * kChunkSize)
+      + (num_chunks_ - 1) * kChunkSize + (kChunkCapacity - 1) * kAlignedMonitorSize);
   for (size_t i = 0; i < kChunkCapacity - 1; ++i) {
     Monitor* before = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(last) -
                                                  kAlignedMonitorSize);
@@ -91,21 +90,19 @@
 
 void MonitorPool::FreeInternal() {
   // This is on shutdown with NO_THREAD_SAFETY_ANALYSIS, can't/don't need to lock.
-  uintptr_t* backing = monitor_chunks_.LoadRelaxed();
-  DCHECK(backing != nullptr);
-  DCHECK_GT(capacity_, 0U);
-  DCHECK_GT(num_chunks_, 0U);
-
-  for (size_t i = 0; i < capacity_; ++i) {
-    if (i < num_chunks_) {
-      DCHECK_NE(backing[i], 0U);
-      allocator_.deallocate(reinterpret_cast<uint8_t*>(backing[i]), kChunkSize);
-    } else {
-      DCHECK_EQ(backing[i], 0U);
+  DCHECK_NE(current_chunk_list_capacity_, 0UL);
+  for (size_t i = 0; i <= current_chunk_list_index_; ++i) {
+    DCHECK_NE(monitor_chunks_[i], static_cast<uintptr_t*>(nullptr));
+    for (size_t j = 0; j < ChunkListCapacity(i); ++j) {
+      if (i < current_chunk_list_index_ || j < num_chunks_) {
+        DCHECK_NE(monitor_chunks_[i][j], 0U);
+        allocator_.deallocate(reinterpret_cast<uint8_t*>(monitor_chunks_[i][j]), kChunkSize);
+      } else {
+        DCHECK_EQ(monitor_chunks_[i][j], 0U);
+      }
     }
+    delete[] monitor_chunks_[i];
   }
-
-  delete[] backing;
 }
 
 Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj,
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 875b3fe..99810e0 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -128,12 +128,17 @@
   void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
   void ReleaseMonitorsToPool(Thread* self, MonitorList::Monitors* monitors);
 
-  // Note: This is safe as we do not ever move chunks.
+  // Note: This is safe as we do not ever move chunks.  All needed entries in the monitor_chunks_
+  // data structure are read-only once we get here.  Updates happen-before this call because
+  // the lock word was stored with release semantics and we read it with acquire semantics to
+  // retrieve the id.
   Monitor* LookupMonitor(MonitorId mon_id) {
     size_t offset = MonitorIdToOffset(mon_id);
     size_t index = offset / kChunkSize;
+    size_t top_index = index / kMaxListSize;
+    size_t list_index = index % kMaxListSize;
     size_t offset_in_chunk = offset % kChunkSize;
-    uintptr_t base = *(monitor_chunks_.LoadRelaxed()+index);
+    uintptr_t base = monitor_chunks_[top_index][list_index];
     return reinterpret_cast<Monitor*>(base + offset_in_chunk);
   }
 
@@ -142,28 +147,37 @@
     return base_addr <= mon_ptr && (mon_ptr - base_addr < kChunkSize);
   }
 
-  // Note: This is safe as we do not ever move chunks.
   MonitorId ComputeMonitorIdInPool(Monitor* mon, Thread* self) {
     MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
-    for (size_t index = 0; index < num_chunks_; ++index) {
-      uintptr_t chunk_addr = *(monitor_chunks_.LoadRelaxed() + index);
-      if (IsInChunk(chunk_addr, mon)) {
-        return OffsetToMonitorId(
-            reinterpret_cast<uintptr_t>(mon) - chunk_addr + index * kChunkSize);
+    for (size_t i = 0; i <= current_chunk_list_index_; ++i) {
+      for (size_t j = 0; j < ChunkListCapacity(i); ++j) {
+        if (j >= num_chunks_ && i == current_chunk_list_index_) {
+          break;
+        }
+        uintptr_t chunk_addr = monitor_chunks_[i][j];
+        if (IsInChunk(chunk_addr, mon)) {
+          return OffsetToMonitorId(
+              reinterpret_cast<uintptr_t>(mon) - chunk_addr
+              + i * (kMaxListSize * kChunkSize) + j * kChunkSize);
+        }
       }
     }
     LOG(FATAL) << "Did not find chunk that contains monitor.";
     return 0;
   }
 
-  static size_t MonitorIdToOffset(MonitorId id) {
+  static constexpr size_t MonitorIdToOffset(MonitorId id) {
     return id << 3;
   }
 
-  static MonitorId OffsetToMonitorId(size_t offset) {
+  static constexpr MonitorId OffsetToMonitorId(size_t offset) {
     return static_cast<MonitorId>(offset >> 3);
   }
 
+  static constexpr size_t ChunkListCapacity(size_t index) {
+    return kInitialChunkStorage << index;
+  }
+
   // TODO: There are assumptions in the code that monitor addresses are 8B aligned (>>3).
   static constexpr size_t kMonitorAlignment = 8;
   // Size of a monitor, rounded up to a multiple of alignment.
@@ -174,20 +188,47 @@
   // Chunk size that is referenced in the id. We can collapse this to the actually used storage
   // in a chunk, i.e., kChunkCapacity * kAlignedMonitorSize, but this will mean proper divisions.
   static constexpr size_t kChunkSize = kPageSize;
-  // The number of initial chunks storable in monitor_chunks_. The number is large enough to make
-  // resizing unlikely, but small enough to not waste too much memory.
-  static constexpr size_t kInitialChunkStorage = 8U;
+  static_assert(IsPowerOfTwo(kChunkSize), "kChunkSize must be power of 2");
+  // The number of chunks of storage that can be referenced by the initial chunk list.
+  // The total number of usable monitor chunks is typically 255 times this number, so it
+  // should be large enough that we don't run out. We run out of address bits if it's > 512.
+  // Currently we set it a bit smaller, to save half a page per process.  We make it tiny in
+  // debug builds to catch growth errors. The only value we really expect to tune.
+  static constexpr size_t kInitialChunkStorage = kIsDebugBuild ? 1U : 256U;
+  static_assert(IsPowerOfTwo(kInitialChunkStorage), "kInitialChunkStorage must be power of 2");
+  // The number of lists, each containing pointers to storage chunks.
+  static constexpr size_t kMaxChunkLists = 8;  //  Dictated by 3 bit index. Don't increase above 8.
+  static_assert(IsPowerOfTwo(kMaxChunkLists), "kMaxChunkLists must be power of 2");
+  static constexpr size_t kMaxListSize = kInitialChunkStorage << (kMaxChunkLists - 1);
+  // We lose 3 bits in monitor id due to 3 bit monitor_chunks_ index, and gain it back from
+  // the 3 bit alignment constraint on monitors:
+  static_assert(kMaxListSize * kChunkSize < (1 << LockWord::kMonitorIdSize),
+      "Monitor id bits don't fit");
+  static_assert(IsPowerOfTwo(kMaxListSize), "kMaxListSize must be power of 2");
 
-  // List of memory chunks. Each chunk is kChunkSize.
-  Atomic<uintptr_t*> monitor_chunks_;
-  // Number of chunks stored.
+  // Array of pointers to lists (again arrays) of pointers to chunks containing monitors.
+  // Zeroth entry points to a list (array) of kInitialChunkStorage pointers to chunks.
+  // Each subsequent list as twice as large as the preceding one.
+  // Monitor Ids are interpreted as follows:
+  //     Top 3 bits (of 28): index into monitor_chunks_.
+  //     Next 16 bits: index into the chunk list, i.e. monitor_chunks_[i].
+  //     Last 9 bits: offset within chunk, expressed as multiple of kMonitorAlignment.
+  // If we set kInitialChunkStorage to 512, this would allow us to use roughly 128K chunks of
+  // monitors, which is 0.5GB of monitors.  With this maximum setting, the largest chunk list
+  // contains 64K entries, and we make full use of the available index space. With a
+  // kInitialChunkStorage value of 256, this is proportionately reduced to 0.25GB of monitors.
+  // Updates to monitor_chunks_ are guarded by allocated_monitor_ids_lock_ .
+  // No field in this entire data structure is ever updated once a monitor id whose lookup
+  // requires it has been made visible to another thread.  Thus readers never race with
+  // updates, in spite of the fact that they acquire no locks.
+  uintptr_t* monitor_chunks_[kMaxChunkLists];  //  uintptr_t is really a Monitor* .
+  // Highest currently used index in monitor_chunks_ . Used for newly allocated chunks.
+  size_t current_chunk_list_index_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // Number of chunk pointers stored in monitor_chunks_[current_chunk_list_index_] so far.
   size_t num_chunks_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
-  // Number of chunks storable.
-  size_t capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
-
-  // To avoid race issues when resizing, we keep all the previous arrays.
-  std::vector<std::unique_ptr<uintptr_t[]>> old_chunk_arrays_
-      GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+  // After the initial allocation, this is always equal to
+  // ChunkListCapacity(current_chunk_list_index_).
+  size_t current_chunk_list_capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 
   typedef TrackingAllocator<uint8_t, kAllocatorTagMonitorPool> Allocator;
   Allocator allocator_;
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 3397989..0126b4d 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -16,16 +16,20 @@
 
 #include "dalvik_system_DexFile.h"
 
+#include <sstream>
+
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "class_linker.h"
 #include "common_throws.h"
+#include "compiler_filter.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
@@ -386,11 +390,67 @@
   return oat_file_assistant.GetDexOptNeeded(filter);
 }
 
+static jstring DexFile_getDexFileStatus(JNIEnv* env,
+                                        jclass,
+                                        jstring javaFilename,
+                                        jstring javaInstructionSet) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(
+      instruction_set.c_str());
+  if (target_instruction_set == kNone) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set.c_str()));
+    env->ThrowNew(iae.get(), message.c_str());
+    return nullptr;
+  }
+
+  OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
+                                      false /* profile_changed */,
+                                      false /* load_executable */);
+
+  std::ostringstream status;
+  bool oat_file_exists = false;
+  bool odex_file_exists = false;
+  if (oat_file_assistant.OatFileExists()) {
+    oat_file_exists = true;
+    status << *oat_file_assistant.OatFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OatFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OatFileStatus();
+  }
+
+  if (oat_file_assistant.OdexFileExists()) {
+    odex_file_exists = true;
+    if (oat_file_exists) {
+      status << "] ";
+    }
+    status << *oat_file_assistant.OdexFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OdexFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OdexFileStatus();
+  }
+
+  if (!oat_file_exists && !odex_file_exists) {
+    status << "invalid[";
+  }
+
+  status << "]";
+  return env->NewStringUTF(status.str().c_str());
+}
+
 static jint DexFile_getDexOptNeeded(JNIEnv* env,
                                     jclass,
                                     jstring javaFilename,
                                     jstring javaInstructionSet,
-                                    jint javaTargetCompilationTypeMask) {
+                                    jstring javaTargetCompilerFilter,
+                                    jboolean newProfile) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
     return -1;
@@ -401,18 +461,16 @@
     return -1;
   }
 
-  // TODO: Take profile changed and compiler filter as arguments.
-  // For now, we use "speed" by default, unless EXTRACT_ONLY = 0x4 was
-  // included in the mask.
-  const char* compiler_filter = "speed";
-  if (javaTargetCompilationTypeMask & 0x4) {
-    compiler_filter = "verify-at-runtime";
+  ScopedUtfChars target_compiler_filter(env, javaTargetCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
   }
+
   return GetDexOptNeeded(env,
                          filename.c_str(),
                          instruction_set.c_str(),
-                         compiler_filter,
-                         /*profile_changed*/false);
+                         target_compiler_filter.c_str(),
+                         newProfile == JNI_TRUE);
 }
 
 // public API
@@ -428,6 +486,69 @@
   return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
 }
 
+static jboolean DexFile_isValidCompilerFilter(JNIEnv* env,
+                                            jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                            jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
+  }
+
+  CompilerFilter::Filter filter;
+  return CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)
+      ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean DexFile_isProfileGuidedCompilerFilter(JNIEnv* env,
+                                                      jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                      jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return -1;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return JNI_FALSE;
+  }
+  return CompilerFilter::DependsOnProfile(filter) ? JNI_TRUE : JNI_FALSE;
+}
+
+static jstring DexFile_getNonProfileGuidedCompilerFilter(JNIEnv* env,
+                                                         jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                         jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return javaCompilerFilter;
+  }
+
+  CompilerFilter::Filter new_filter = CompilerFilter::GetNonProfileDependentFilterFrom(filter);
+
+  // Filter stayed the same, return input.
+  if (filter == new_filter) {
+    return javaCompilerFilter;
+  }
+
+  // Create a new string object and return.
+  std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter);
+  return env->NewStringUTF(new_filter_str.c_str());
+}
+
+static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) {
+  const OatFile* oat_file = nullptr;
+  std::vector<const DexFile*> dex_files;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out */ dex_files, /* out */ oat_file)) {
+    DCHECK(env->ExceptionCheck());
+    return false;
+  }
+  return oat_file != nullptr;
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile,
@@ -440,7 +561,7 @@
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(DexFile, getDexOptNeeded,
-                "(Ljava/lang/String;Ljava/lang/String;I)I"),
+                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
                 "(Ljava/lang/String;"
                 "Ljava/lang/String;"
@@ -448,6 +569,14 @@
                 "Ljava/lang/ClassLoader;"
                 "[Ldalvik/system/DexPathList$Element;"
                 ")Ljava/lang/Object;"),
+  NATIVE_METHOD(DexFile, isValidCompilerFilter, "(Ljava/lang/String;)Z"),
+  NATIVE_METHOD(DexFile, isProfileGuidedCompilerFilter, "(Ljava/lang/String;)Z"),
+  NATIVE_METHOD(DexFile,
+                getNonProfileGuidedCompilerFilter,
+                "(Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
+  NATIVE_METHOD(DexFile, getDexFileStatus,
+                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index d22c0c7..5ba8df7 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -16,7 +16,7 @@
 
 #include "dalvik_system_VMRuntime.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 extern "C" void android_set_application_target_sdk_version(uint32_t version);
 #endif
 #include <limits.h>
@@ -196,7 +196,7 @@
   // Note that targetSdkVersion may be 0, meaning "current".
   Runtime::Current()->SetTargetSdkVersion(target_sdk_version);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // This part is letting libc/dynamic linker know about current app's
   // target sdk version to enable compatibility workarounds.
   android_set_application_target_sdk_version(static_cast<uint32_t>(target_sdk_version));
@@ -212,6 +212,10 @@
   Runtime::Current()->GetHeap()->RegisterNativeAllocation(env, static_cast<size_t>(bytes));
 }
 
+static void VMRuntime_registerSensitiveThread(JNIEnv*, jobject) {
+  Runtime::Current()->RegisterSensitiveThread();
+}
+
 static void VMRuntime_registerNativeFree(JNIEnv* env, jobject, jint bytes) {
   if (UNLIKELY(bytes < 0)) {
     ScopedObjectAccess soa(env);
@@ -643,6 +647,7 @@
   NATIVE_METHOD(VMRuntime, properties, "()[Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"),
   NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"),
+  NATIVE_METHOD(VMRuntime, registerSensitiveThread, "()V"),
   NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"),
   NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"),
   NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"),
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 887eee0..1aa789f 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -209,9 +209,21 @@
   }
 }
 
+static void ZygoteHooks_startZygoteNoThreadCreation(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                    jclass klass ATTRIBUTE_UNUSED) {
+  Runtime::Current()->SetZygoteNoThreadSection(true);
+}
+
+static void ZygoteHooks_stopZygoteNoThreadCreation(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                   jclass klass ATTRIBUTE_UNUSED) {
+  Runtime::Current()->SetZygoteNoThreadSection(false);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(ZygoteHooks, nativePreFork, "()J"),
   NATIVE_METHOD(ZygoteHooks, nativePostForkChild, "(JIZLjava/lang/String;)V"),
+  NATIVE_METHOD(ZygoteHooks, startZygoteNoThreadCreation, "()V"),
+  NATIVE_METHOD(ZygoteHooks, stopZygoteNoThreadCreation, "()V"),
 };
 
 void register_dalvik_system_ZygoteHooks(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index bf24de5..0624da3 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -238,12 +238,13 @@
   DCHECK(name != nullptr);
   DCHECK(self != nullptr);
 
-  StackHandleScope<1> hs(self);
+  StackHandleScope<2> hs(self);
   MutableHandle<mirror::Class> h_clazz(hs.NewHandle(clazz));
+  Handle<mirror::String> h_name(hs.NewHandle(name));
 
   // We search the current class, its direct interfaces then its superclass.
   while (h_clazz.Get() != nullptr) {
-    mirror::Field* result = GetDeclaredField(self, h_clazz.Get(), name);
+    mirror::Field* result = GetDeclaredField(self, h_clazz.Get(), h_name.Get());
     if ((result != nullptr) && (result->GetAccessFlags() & kAccPublic)) {
       return result;
     } else if (UNLIKELY(self->IsExceptionPending())) {
@@ -258,7 +259,7 @@
         self->AssertPendingException();
         return nullptr;
       }
-      result = GetPublicFieldRecursive(self, iface, name);
+      result = GetPublicFieldRecursive(self, iface, h_name.Get());
       if (result != nullptr) {
         DCHECK(result->GetAccessFlags() & kAccPublic);
         return result;
@@ -322,15 +323,11 @@
 static jobject Class_getDeclaredConstructorInternal(
     JNIEnv* env, jobject javaThis, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  auto* klass = DecodeClass(soa, javaThis);
-  auto* params = soa.Decode<mirror::ObjectArray<mirror::Class>*>(args);
-  StackHandleScope<1> hs(soa.Self());
-  auto* declared_constructor = klass->GetDeclaredConstructor(soa.Self(), hs.NewHandle(params));
-  if (declared_constructor != nullptr) {
-    return soa.AddLocalReference<jobject>(
-        mirror::Constructor::CreateFromArtMethod(soa.Self(), declared_constructor));
-  }
-  return nullptr;
+  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal(
+      soa.Self(),
+      DecodeClass(soa, javaThis),
+      soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
+  return soa.AddLocalReference<jobject>(result);
 }
 
 static ALWAYS_INLINE inline bool MethodMatchesConstructor(ArtMethod* m, bool public_only)
@@ -521,14 +518,7 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
-  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
-    return defaultValue;
-  }
-  uint32_t flags;
-  if (!klass->GetDexFile().GetInnerClassFlags(klass, &flags)) {
-    return defaultValue;
-  }
-  return flags;
+  return mirror::Class::GetInnerClassFlags(klass, defaultValue);
 }
 
 static jstring Class_getInnerClassName(JNIEnv* env, jobject javaThis) {
@@ -545,6 +535,17 @@
   return soa.AddLocalReference<jstring>(class_name);
 }
 
+static jobjectArray Class_getSignatureAnnotation(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jobjectArray>(
+      klass->GetDexFile().GetSignatureAnnotationForClass(klass));
+}
+
 static jboolean Class_isAnonymousClass(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
@@ -608,7 +609,8 @@
   }
   auto* constructor = klass->GetDeclaredConstructor(
       soa.Self(),
-      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>());
+      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>(),
+      sizeof(void*));
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
@@ -692,6 +694,7 @@
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, getSignatureAnnotation, "!()[Ljava/lang/String;"),
   NATIVE_METHOD(Class, isAnonymousClass, "!()Z"),
   NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "!(Ljava/lang/Class;)Z"),
   NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index d9863c5..9e2d68d 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -149,7 +149,9 @@
     dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
     return;
   }
-  dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
+  // This code is never run under a transaction.
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  dstObjArray->AssignableCheckingMemcpy<false>(dstPos, srcObjArray, srcPos, count, true);
 }
 
 // Template to convert general array to that of its specific primitive type.
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 13edd67..a742e81 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -47,6 +47,15 @@
 
 static void Thread_nativeCreate(JNIEnv* env, jclass, jobject java_thread, jlong stack_size,
                                 jboolean daemon) {
+  // There are sections in the zygote that forbid thread creation.
+  Runtime* runtime = Runtime::Current();
+  if (runtime->IsZygote() && runtime->IsZygoteNoThreadSection()) {
+    jclass internal_error = env->FindClass("java/lang/InternalError");
+    CHECK(internal_error != nullptr);
+    env->ThrowNew(internal_error, "Cannot create threads in zygote");
+    return;
+  }
+
   Thread::CreateNativeThread(env, java_thread, stack_size, daemon == JNI_TRUE);
 }
 
diff --git a/runtime/native/java_lang_reflect_AbstractMethod.cc b/runtime/native/java_lang_reflect_AbstractMethod.cc
new file mode 100644
index 0000000..7e11c11
--- /dev/null
+++ b/runtime/native/java_lang_reflect_AbstractMethod.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_reflect_AbstractMethod.h"
+
+#include "art_method-inl.h"
+#include "jni_internal.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "reflection.h"
+#include "scoped_fast_native_object_access.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+static jobjectArray AbstractMethod_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    // Return an empty array instead of a null pointer.
+    mirror::Class* annotation_array_class =
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
+    mirror::ObjectArray<mirror::Object>* empty_array =
+        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  }
+  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+}
+
+static jobjectArray AbstractMethod_getSignatureAnnotation(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  return soa.AddLocalReference<jobjectArray>(
+      method->GetDexFile()->GetSignatureAnnotationForMethod(method));
+}
+
+
+static jboolean AbstractMethod_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
+                                                         jclass annotationType) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return false;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(AbstractMethod, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
+  NATIVE_METHOD(AbstractMethod, getSignatureAnnotation, "!()[Ljava/lang/String;"),
+  NATIVE_METHOD(AbstractMethod, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
+};
+
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/reflect/AbstractMethod");
+}
+
+}  // namespace art
diff --git a/compiler/utils/arena_allocator_test.cc b/runtime/native/java_lang_reflect_AbstractMethod.h
similarity index 61%
rename from compiler/utils/arena_allocator_test.cc
rename to runtime/native/java_lang_reflect_AbstractMethod.h
index 7f67ef1..222e5a0 100644
--- a/compiler/utils/arena_allocator_test.cc
+++ b/runtime/native/java_lang_reflect_AbstractMethod.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,20 +14,15 @@
  * limitations under the License.
  */
 
-#include "base/arena_allocator.h"
-#include "base/arena_bit_vector.h"
-#include "gtest/gtest.h"
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+
+#include <jni.h>
 
 namespace art {
 
-TEST(ArenaAllocator, Test) {
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  ArenaBitVector bv(&arena, 10, true);
-  bv.SetBit(5);
-  EXPECT_EQ(1U, bv.GetStorageSize());
-  bv.SetBit(35);
-  EXPECT_EQ(2U, bv.GetStorageSize());
-}
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env);
 
 }  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index ddcaade..54b8afd 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -34,20 +34,38 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return soa.AddLocalReference<jobject>(
-      method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+    return soa.AddLocalReference<jobject>(
+        method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  }
 }
 
 static jobjectArray Constructor_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+  if (method->IsProxyMethod()) {
+    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+    mirror::Class* class_array_class =
+        Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
+    if (class_array_class == nullptr) {
+      return nullptr;
+    }
+    mirror::ObjectArray<mirror::Class>* empty_array =
+        mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetAnnotationsForMethod(method));
+  }
 }
 
 static jobjectArray Constructor_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod)
+      ->GetInterfaceMethodIfProxy(sizeof(void*));
   mirror::ObjectArray<mirror::Class>* result_array =
       method->GetDexFile()->GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
@@ -69,7 +87,12 @@
 static jobjectArray Constructor_getParameterAnnotationsNative(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetParameterAnnotations(method));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetParameterAnnotations(method));
+  }
 }
 
 static jboolean Constructor_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
@@ -77,6 +100,10 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->IsProxyMethod()) {
+    // Proxies have no annotations.
+    return false;
+  }
   Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
   return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
 }
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index d7cf62e..78999c2 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -41,20 +41,6 @@
       method->GetDexFile()->GetAnnotationForMethod(method, klass));
 }
 
-static jobjectArray Method_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    // Return an empty array instead of a null pointer.
-    mirror::Class* annotation_array_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
-    mirror::ObjectArray<mirror::Object>* empty_array =
-        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
-    return soa.AddLocalReference<jobjectArray>(empty_array);
-  }
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
-}
-
 static jobject Method_getDefaultValue(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
@@ -116,27 +102,13 @@
   return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
 }
 
-static jboolean Method_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
-                                                 jclass annotationType) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    return false;
-  }
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
-}
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Method, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Method, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, getDefaultValue, "!()Ljava/lang/Object;"),
   NATIVE_METHOD(Method, getExceptionTypes, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Method, getParameterAnnotationsNative, "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Method, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
 };
 
 void register_java_lang_reflect_Method(JNIEnv* env) {
diff --git a/runtime/oat.cc b/runtime/oat.cc
index d13999a..80231f3 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -462,6 +462,10 @@
   return IsKeyEnabled(OatHeader::kPicKey);
 }
 
+bool OatHeader::HasPatchInfo() const {
+  return IsKeyEnabled(OatHeader::kHasPatchInfoKey);
+}
+
 bool OatHeader::IsDebuggable() const {
   return IsKeyEnabled(OatHeader::kDebuggableKey);
 }
diff --git a/runtime/oat.h b/runtime/oat.h
index 0dcc52e..543d99f 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,12 +32,13 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '6', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '9', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDex2OatHostKey = "dex2oat-host";
   static constexpr const char* kPicKey = "pic";
+  static constexpr const char* kHasPatchInfoKey = "has-patch-info";
   static constexpr const char* kDebuggableKey = "debuggable";
   static constexpr const char* kNativeDebuggableKey = "native-debuggable";
   static constexpr const char* kCompilerFilter = "compiler-filter";
@@ -109,6 +110,7 @@
 
   size_t GetHeaderSize() const;
   bool IsPic() const;
+  bool HasPatchInfo() const;
   bool IsDebuggable() const;
   bool IsNativeDebuggable() const;
   CompilerFilter::Filter GetCompilerFilter() const;
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 7b92120..d7d0c4f 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -71,44 +71,6 @@
   return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
 }
 
-inline const uint8_t* OatFile::OatMethod::GetGcMap() const {
-  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].gc_map_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
-inline uint32_t OatFile::OatMethod::GetGcMapOffset() const {
-  const uint8_t* gc_map = GetGcMap();
-  return static_cast<uint32_t>(gc_map != nullptr ? gc_map - begin_ : 0u);
-}
-
-inline uint32_t OatFile::OatMethod::GetGcMapOffsetOffset() const {
-  const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
-  if (method_header == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const uint8_t*>(&method_header->gc_map_offset_) - begin_;
-}
-
-inline uint32_t OatFile::OatMethod::GetMappingTableOffset() const {
-  const uint8_t* mapping_table = GetMappingTable();
-  return static_cast<uint32_t>(mapping_table != nullptr ? mapping_table - begin_ : 0u);
-}
-
-inline uint32_t OatFile::OatMethod::GetMappingTableOffsetOffset() const {
-  const OatQuickMethodHeader* method_header = GetOatQuickMethodHeader();
-  if (method_header == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const uint8_t*>(&method_header->mapping_table_offset_) - begin_;
-}
-
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
   const uint8_t* vmap_table = GetVmapTable();
   return static_cast<uint32_t>(vmap_table != nullptr ? vmap_table - begin_ : 0u);
@@ -122,18 +84,6 @@
   return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
 }
 
-inline const uint8_t* OatFile::OatMethod::GetMappingTable() const {
-  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
-  if (code == nullptr) {
-    return nullptr;
-  }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code) - offset;
-}
-
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
   const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 9ae033f..ae84019 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -28,7 +28,7 @@
 #include <sstream>
 
 // dlopen_ext support from bionic.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include "android/dlext.h"
 #endif
 
@@ -50,7 +50,6 @@
 #include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -173,7 +172,7 @@
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     // Host can fail this check. Do not dump there to avoid polluting the output.
-    if (kIsTargetBuild) {
+    if (kIsTargetBuild && (kIsDebugBuild || VLOG_IS_ON(oat))) {
       PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     }
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
@@ -624,7 +623,7 @@
       *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
       return false;
     }
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     android_dlextinfo extinfo;
     extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
                                                                 //   (open oat files multiple
@@ -639,7 +638,7 @@
 #else
     dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
     UNUSED(oat_file_begin);
-#endif
+#endif  // ART_TARGET_ANDROID
   }
   if (dlopen_handle_ == nullptr) {
     *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
@@ -1248,6 +1247,10 @@
   method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
 }
 
+bool OatFile::HasPatchInfo() const {
+  return GetOatHeader().HasPatchInfo();
+}
+
 bool OatFile::IsPic() const {
   return GetOatHeader().IsPic();
   // TODO: Check against oat_patches. b/18144996
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 21aeab4..9470624 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -87,6 +87,8 @@
     return is_executable_;
   }
 
+  bool HasPatchInfo() const;
+
   bool IsPic() const;
 
   // Indicates whether the oat file was compiled with full debugging capability.
@@ -121,18 +123,10 @@
     uint32_t GetCoreSpillMask() const;
     uint32_t GetFpSpillMask() const;
 
-    const uint8_t* GetMappingTable() const;
-    uint32_t GetMappingTableOffset() const;
-    uint32_t GetMappingTableOffsetOffset() const;
-
     const uint8_t* GetVmapTable() const;
     uint32_t GetVmapTableOffset() const;
     uint32_t GetVmapTableOffsetOffset() const;
 
-    const uint8_t* GetGcMap() const;
-    uint32_t GetGcMapOffset() const;
-    uint32_t GetGcMapOffsetOffset() const;
-
     // Create an OatMethod with offsets relative to the given base address
     OatMethod(const uint8_t* base, const uint32_t code_offset)
         : begin_(base), code_offset_(code_offset) {
@@ -376,6 +370,10 @@
     return lookup_table_data_;
   }
 
+  const uint8_t* GetDexFilePointer() const {
+    return dex_file_pointer_;
+  }
+
   ~OatDexFile();
 
  private:
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 096296b..713e2f3 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -30,6 +30,7 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
+#include "compiler_filter.h"
 #include "class_linker.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
@@ -43,6 +44,24 @@
 
 namespace art {
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status) {
+  switch (status) {
+    case OatFileAssistant::kOatOutOfDate:
+      stream << "kOatOutOfDate";
+      break;
+    case OatFileAssistant::kOatUpToDate:
+      stream << "kOatUpToDate";
+      break;
+    case OatFileAssistant::kOatNeedsRelocation:
+      stream << "kOatNeedsRelocation";
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  return stream;
+}
+
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
                                    bool profile_changed,
@@ -77,7 +96,7 @@
 OatFileAssistant::~OatFileAssistant() {
   // Clean up the lock file.
   if (flock_.HasFile()) {
-    TEMP_FAILURE_RETRY(unlink(flock_.GetFile()->GetPath().c_str()));
+    unlink(flock_.GetFile()->GetPath().c_str());
   }
 }
 
@@ -109,7 +128,7 @@
   std::string lock_file_name = *OatFileName() + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
-    TEMP_FAILURE_RETRY(unlink(lock_file_name.c_str()));
+    unlink(lock_file_name.c_str());
     return false;
   }
   return true;
@@ -166,15 +185,11 @@
 
   // See if we can get an up-to-date file by running patchoat.
   if (compilation_desired) {
-    if (odex_okay && OdexFileNeedsRelocation()) {
-      // TODO: don't return kPatchOatNeeded if the odex file contains no
-      // patch information.
+    if (odex_okay && OdexFileNeedsRelocation() && OdexFileHasPatchInfo()) {
       return kPatchOatNeeded;
     }
 
-    if (oat_okay && OatFileNeedsRelocation()) {
-      // TODO: don't return kSelfPatchOatNeeded if the oat file contains no
-      // patch information.
+    if (oat_okay && OatFileNeedsRelocation() && OatFileHasPatchInfo()) {
       return kSelfPatchOatNeeded;
     }
   }
@@ -183,10 +198,38 @@
   return HasOriginalDexFiles() ? kDex2OatNeeded : kNoDexOptNeeded;
 }
 
-bool OatFileAssistant::MakeUpToDate(CompilerFilter::Filter target, std::string* error_msg) {
+// Figure out the currently specified compile filter option in the runtime.
+// Returns true on success, false if the compiler filter is invalid, in which
+// case error_msg describes the problem.
+static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter,
+                                           std::string* error_msg) {
+  CHECK(filter != nullptr);
+  CHECK(error_msg != nullptr);
+
+  *filter = CompilerFilter::kDefaultCompilerFilter;
+  for (StringPiece option : Runtime::Current()->GetCompilerOptions()) {
+    if (option.starts_with("--compiler-filter=")) {
+      const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
+      if (!CompilerFilter::ParseCompilerFilter(compiler_filter_string, filter)) {
+        *error_msg = std::string("Unknown --compiler-filter value: ")
+                   + std::string(compiler_filter_string);
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::MakeUpToDate(std::string* error_msg) {
+  CompilerFilter::Filter target;
+  if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
+    return kUpdateNotAttempted;
+  }
+
   switch (GetDexOptNeeded(target)) {
-    case kNoDexOptNeeded: return true;
-    case kDex2OatNeeded: return GenerateOatFile(target, error_msg);
+    case kNoDexOptNeeded: return kUpdateSucceeded;
+    case kDex2OatNeeded: return GenerateOatFile(error_msg);
     case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
     case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
   }
@@ -353,6 +396,12 @@
   return cached_odex_file_is_up_to_date_;
 }
 
+CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
+  const OatFile* odex_file = GetOdexFile();
+  CHECK(odex_file != nullptr);
+
+  return odex_file->GetCompilerFilter();
+}
 std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
   const std::string oat_file_location = oat_file->GetLocation();
   // Replace extension with .art
@@ -431,6 +480,13 @@
   return cached_oat_file_is_up_to_date_;
 }
 
+CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
+  const OatFile* oat_file = GetOatFile();
+  CHECK(oat_file != nullptr);
+
+  return oat_file->GetCompilerFilter();
+}
+
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
   // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
   // is more work than we need to do. If performance becomes a concern, and
@@ -495,10 +551,21 @@
     const ImageInfo* image_info = GetImageInfo();
     if (image_info == nullptr) {
       VLOG(oat) << "No image for oat image checksum to match against.";
-      return true;
-    }
 
-    if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) {
+      if (HasOriginalDexFiles()) {
+        return true;
+      }
+
+      // If there is no original dex file to fall back to, grudgingly accept
+      // the oat file. This could technically lead to crashes, but there's no
+      // way we could find a better oat file to use for this dex location,
+      // and it's better than being stuck in a boot loop with no way out.
+      // The problem will hopefully resolve itself the next time the runtime
+      // starts up.
+      LOG(WARNING) << "Dex location " << dex_location_ << " does not seem to include dex file. "
+        << "Allow oat file use. This is potentially dangerous.";
+    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
+        != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
       return true;
     }
@@ -573,21 +640,21 @@
   return true;
 }
 
-bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
-                                       std::string* error_msg) {
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::RelocateOatFile(const std::string* input_file, std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   if (input_file == nullptr) {
     *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the input file name could not be determined.";
-    return false;
+    return kUpdateNotAttempted;
   }
   const std::string& input_file_name = *input_file;
 
   if (OatFileName() == nullptr) {
     *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
-    return false;
+    return kUpdateNotAttempted;
   }
   const std::string& oat_file_name = *OatFileName();
 
@@ -596,13 +663,13 @@
   if (image_info == nullptr) {
     *error_msg = "Patching of oat file " + oat_file_name
       + " not attempted because no image location was found.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   if (!runtime->IsDex2OatEnabled()) {
     *error_msg = "Patching of oat file " + oat_file_name
       + " not attempted because dex2oat is disabled";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   std::vector<std::string> argv;
@@ -616,29 +683,30 @@
   if (!Exec(argv, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
-    return false;
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
   }
 
   // Mark that the oat file has changed and we should try to reload.
   ClearOatFileCache();
-  return true;
+  return kUpdateSucceeded;
 }
 
-bool OatFileAssistant::GenerateOatFile(CompilerFilter::Filter target, std::string* error_msg) {
+OatFileAssistant::ResultOfAttemptToUpdate
+OatFileAssistant::GenerateOatFile(std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsDex2OatEnabled()) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because dex2oat is disabled.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   if (OatFileName() == nullptr) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
-    return false;
+    return kUpdateNotAttempted;
   }
   const std::string& oat_file_name = *OatFileName();
 
@@ -647,7 +715,7 @@
   // TODO: Why does dex2oat behave that way?
   if (!OS::FileExists(dex_location_.c_str())) {
     *error_msg = "Dex location " + dex_location_ + " does not exists.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   std::unique_ptr<File> oat_file;
@@ -655,39 +723,38 @@
   if (oat_file.get() == nullptr) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the oat file could not be created.";
-    return false;
+    return kUpdateNotAttempted;
   }
 
   if (fchmod(oat_file->Fd(), 0644) != 0) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the oat file could not be made world readable.";
     oat_file->Erase();
-    return false;
+    return kUpdateNotAttempted;
   }
 
   std::vector<std::string> args;
   args.push_back("--dex-file=" + dex_location_);
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
-  args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(target));
 
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
     oat_file->Erase();
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
-    return false;
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
   }
 
   if (oat_file->FlushCloseOrErase() != 0) {
     *error_msg = "Unable to close oat file " + oat_file_name;
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
-    return false;
+    unlink(oat_file_name.c_str());
+    return kUpdateFailed;
   }
 
   // Mark that the oat file has changed and we should try to reload.
   ClearOatFileCache();
-  return true;
+  return kUpdateSucceeded;
 }
 
 bool OatFileAssistant::Dex2Oat(const std::vector<std::string>& args,
@@ -863,6 +930,11 @@
   return (odex_file != nullptr && odex_file->IsExecutable());
 }
 
+bool OatFileAssistant::OdexFileHasPatchInfo() {
+  const OatFile* odex_file = GetOdexFile();
+  return (odex_file != nullptr && odex_file->HasPatchInfo());
+}
+
 void OatFileAssistant::ClearOdexFileCache() {
   odex_file_load_attempted_ = false;
   cached_odex_file_.reset();
@@ -899,6 +971,11 @@
   return (oat_file != nullptr && oat_file->IsExecutable());
 }
 
+bool OatFileAssistant::OatFileHasPatchInfo() {
+  const OatFile* oat_file = GetOatFile();
+  return (oat_file != nullptr && oat_file->HasPatchInfo());
+}
+
 void OatFileAssistant::ClearOatFileCache() {
   oat_file_load_attempted_ = false;
   cached_oat_file_.reset();
@@ -923,8 +1000,7 @@
         cached_image_info_.patch_delta = image_header.GetPatchDelta();
       } else {
         std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeaderOrDie(
-                cached_image_info_.location.c_str(), isa_));
+            gc::space::ImageSpace::ReadImageHeaderOrDie(cached_image_info_.location.c_str(), isa_));
         cached_image_info_.oat_checksum = image_header->GetOatChecksum();
         cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
             image_header->GetOatDataBegin());
@@ -932,10 +1008,39 @@
       }
     }
     image_info_load_succeeded_ = (!image_spaces.empty());
+
+    combined_image_checksum_ = CalculateCombinedImageChecksum(isa_);
   }
   return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
 }
 
+// TODO: Use something better than xor.
+uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) {
+  uint32_t checksum = 0;
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  if (isa == kRuntimeISA) {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      checksum ^= image_space->GetImageHeader().GetOatChecksum();
+    }
+  } else {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      std::string location = image_space->GetImageLocation();
+      std::unique_ptr<ImageHeader> image_header(
+          gc::space::ImageSpace::ReadImageHeaderOrDie(location.c_str(), isa));
+      checksum ^= image_header->GetOatChecksum();
+    }
+  }
+  return checksum;
+}
+
+uint32_t OatFileAssistant::GetCombinedImageChecksum() {
+  if (!image_info_load_attempted_) {
+    GetImageInfo();
+  }
+  return combined_image_checksum_;
+}
+
 gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
   DCHECK(oat_file != nullptr);
   std::string art_file = ArtFileName(oat_file);
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 452cd84..f48cdf3 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <string>
 
 #include "arch/instruction_set.h"
@@ -148,14 +149,23 @@
   // given compiler filter.
   DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
 
+  // Return code used when attempting to generate updated code.
+  enum ResultOfAttemptToUpdate {
+    kUpdateFailed,        // We tried making the code up to date, but
+                          // encountered an unexpected failure.
+    kUpdateNotAttempted,  // We wanted to update the code, but determined we
+                          // should not make the attempt.
+    kUpdateSucceeded      // We successfully made the code up to date
+                          // (possibly by doing nothing).
+  };
+
   // Attempts to generate or relocate the oat file as needed to make it up to
-  // date with in a way that is at least as good as an oat file generated with
-  // the given compiler filter.
-  // Returns true on success.
+  // date based on the current runtime and compiler options.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool MakeUpToDate(CompilerFilter::Filter target_compiler_filter, std::string* error_msg);
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate MakeUpToDate(std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -202,6 +212,9 @@
   bool OdexFileIsOutOfDate();
   bool OdexFileNeedsRelocation();
   bool OdexFileIsUpToDate();
+  // Must only be called if the associated odex file exists, i.e, if
+  // |OdexFileExists() == true|.
+  CompilerFilter::Filter OdexFileCompilerFilter();
 
   // When the dex files is compiled on the target device, the oat file is the
   // result. The oat file will have been relocated to some
@@ -218,6 +231,9 @@
   bool OatFileIsOutOfDate();
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
+  // Must only be called if the associated oat file exists, i.e, if
+  // |OatFileExists() == true|.
+  CompilerFilter::Filter OatFileCompilerFilter();
 
   // Return image file name. Does not cache since it relies on the oat file.
   std::string ArtFileName(const OatFile* oat_file) const;
@@ -232,22 +248,21 @@
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
   // oat file.
-  // Returns true on success.
-  // This will fail if dex2oat is not enabled in the current runtime.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool RelocateOatFile(const std::string* input_file, std::string* error_msg);
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate RelocateOatFile(const std::string* input_file, std::string* error_msg);
 
-  // Generate the oat file from the dex file using the given compiler filter.
+  // Generate the oat file from the dex file using the current runtime
+  // compiler options.
   // This does not check the current status before attempting to generate the
   // oat file.
-  // Returns true on success.
-  // This will fail if dex2oat is not enabled in the current runtime.
   //
-  // If there is a failure, the value of error_msg will be set to a string
-  // describing why there was failure. error_msg must not be null.
-  bool GenerateOatFile(CompilerFilter::Filter filter, std::string* error_msg);
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg);
 
   // Executes dex2oat using the current runtime configuration overridden with
   // the given arguments. This does not check to see if dex2oat is enabled in
@@ -269,6 +284,8 @@
   static bool DexFilenameToOdexFilename(const std::string& location,
       InstructionSet isa, std::string* odex_filename, std::string* error_msg);
 
+  static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
+
  private:
   struct ImageInfo {
     uint32_t oat_checksum = 0;
@@ -310,6 +327,9 @@
   // Returns true if the odex file is opened executable.
   bool OdexFileIsExecutable();
 
+  // Returns true if the odex file has patch info required to run patchoat.
+  bool OdexFileHasPatchInfo();
+
   // Clear any cached information about the odex file that depends on the
   // contents of the file.
   void ClearOdexFileCache();
@@ -326,6 +346,9 @@
   // Returns true if the oat file is opened executable.
   bool OatFileIsExecutable();
 
+  // Returns true if the oat file has patch info required to run patchoat.
+  bool OatFileHasPatchInfo();
+
   // Clear any cached information about the oat file that depends on the
   // contents of the file.
   void ClearOatFileCache();
@@ -336,6 +359,8 @@
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
+  uint32_t GetCombinedImageChecksum();
+
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
@@ -407,6 +432,7 @@
   bool image_info_load_attempted_ = false;
   bool image_info_load_succeeded_ = false;
   ImageInfo cached_image_info_;
+  uint32_t combined_image_checksum_ = 0;
 
   // For debugging only.
   // If this flag is set, the oat or odex file has been released to the user
@@ -417,6 +443,8 @@
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OAT_FILE_ASSISTANT_H_
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 634e048..764b969 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -230,6 +230,7 @@
                                                      &error_msg));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
     EXPECT_FALSE(odex_file->IsPic());
+    EXPECT_TRUE(odex_file->HasPatchInfo());
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
 
     if (CompilerFilter::IsCompilationEnabled(filter)) {
@@ -238,7 +239,8 @@
       ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
       const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
       const OatHeader& oat_header = odex_file->GetOatHeader();
-      EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
+      uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+      EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
       EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
           oat_header.GetImageFileLocationOatDataBegin());
       EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
@@ -277,6 +279,44 @@
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
   }
 
+  // Generate a non-PIC odex file without patch information for the purposes
+  // of test.  The generated odex file will be un-relocated.
+  // TODO: This won't work correctly if we depend on the boot image being
+  // randomly relocated by a non-zero amount. We should have a better solution
+  // for avoiding that flakiness and duplicating code to generate odex and oat
+  // files for test.
+  void GenerateNoPatchOdexForTest(const std::string& dex_location,
+                                  const std::string& odex_location,
+                                  CompilerFilter::Filter filter) {
+    // Temporarily redirect the dalvik cache so dex2oat doesn't find the
+    // relocated image file.
+    std::string android_data_tmp = GetScratchDir() + "AndroidDataTmp";
+    setenv("ANDROID_DATA", android_data_tmp.c_str(), 1);
+    std::vector<std::string> args;
+    args.push_back("--dex-file=" + dex_location);
+    args.push_back("--oat-file=" + odex_location);
+    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+    args.push_back("--runtime-arg");
+    args.push_back("-Xnorelocate");
+    std::string error_msg;
+    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+    setenv("ANDROID_DATA", android_data_.c_str(), 1);
+
+    // Verify the odex file was generated as expected.
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                     odex_location.c_str(),
+                                                     nullptr,
+                                                     nullptr,
+                                                     false,
+                                                     /*low_4gb*/false,
+                                                     dex_location.c_str(),
+                                                     &error_msg));
+    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+    EXPECT_FALSE(odex_file->IsPic());
+    EXPECT_FALSE(odex_file->HasPatchInfo());
+    EXPECT_EQ(filter, odex_file->GetCompilerFilter());
+  }
+
  private:
   // Reserve memory around where the image will be loaded so other memory
   // won't conflict when it comes time to load the image.
@@ -413,7 +453,7 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(&error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -664,7 +704,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -726,7 +768,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -782,7 +826,9 @@
 
   // Make the oat file up to date. This should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -832,7 +878,9 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -856,6 +904,39 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
+// Case: We have a DEX file, no ODEX file and an OAT file that needs
+// relocation but doesn't have patch info.
+// Expect: The status is kDex2OatNeeded, because we can't run patchoat.
+TEST_F(OatFileAssistantTest, NoSelfRelocation) {
+  std::string dex_location = GetScratchDir() + "/NoSelfRelocation.jar";
+  std::string oat_location = GetOdexDir() + "/NoSelfRelocation.oat";
+
+  // Create the dex and odex files
+  Copy(GetDexSrc1(), dex_location);
+  GenerateNoPatchOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      oat_location.c_str(), kRuntimeISA, false, true);
+
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  // Make the oat file up to date.
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(1u, dex_files.size());
+}
+
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
 // OAT files both have patch delta of 0.
 // Expect: It shouldn't crash, and status is kPatchOatNeeded.
@@ -1023,7 +1104,9 @@
   OatFileAssistant oat_file_assistant(
       dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg)) << error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1053,7 +1136,9 @@
   OatFileAssistant oat_file_assistant(
       dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
   std::string error_msg;
-  ASSERT_FALSE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.MakeUpToDate(&error_msg));
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() == nullptr);
@@ -1068,7 +1153,9 @@
   OatFileAssistant oat_file_assistant(
       dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
   std::string error_msg;
-  ASSERT_FALSE(oat_file_assistant.GenerateOatFile(CompilerFilter::kSpeed, &error_msg));
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.GenerateOatFile(&error_msg));
 }
 
 // Turn an absolute path into a path relative to the current working
@@ -1147,7 +1234,9 @@
 
   // Trying to make it up to date should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(CompilerFilter::kSpeed, &error_msg));
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -1287,6 +1376,34 @@
   EXPECT_EQ(2u, dex_files.size());
 }
 
+TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) {
+  std::string dex_location = GetScratchDir() + "/RuntimeCompilerFilterOptionUsed.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+
+  std::string error_msg;
+  Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
+  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
+      oat_file_assistant.MakeUpToDate(&error_msg));
+}
+
 TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
   std::string error_msg;
   std::string odex_file;
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 2f13f55..bc01da4 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -44,8 +44,6 @@
 // If true, then we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
-CompilerFilter::Filter OatFileManager::filter_ = CompilerFilter::Filter::kSpeed;
-
 const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
   WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   DCHECK(oat_file != nullptr);
@@ -74,6 +72,20 @@
   compare.release();
 }
 
+const OatFile* OatFileManager::FindOpenedOatFileFromDexLocation(
+    const std::string& dex_base_location) const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
+    for (const OatDexFile* oat_dex_file : oat_dex_files) {
+      if (DexFile::GetBaseLocation(oat_dex_file->GetDexFileLocation()) == dex_base_location) {
+        return oat_file.get();
+      }
+    }
+  }
+  return nullptr;
+}
+
 const OatFile* OatFileManager::FindOpenedOatFileFromOatLocation(const std::string& oat_location)
     const {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
@@ -327,10 +339,23 @@
 
   const OatFile* source_oat_file = nullptr;
 
-  // Update the oat file on disk if we can. This may fail, but that's okay.
-  // Best effort is all that matters here.
-  if (!oat_file_assistant.MakeUpToDate(filter_, /*out*/ &error_msg)) {
-    LOG(INFO) << error_msg;
+  // Update the oat file on disk if we can, based on the --compiler-filter
+  // option derived from the current runtime options.
+  // This may fail, but that's okay. Best effort is all that matters here.
+  switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
+    case OatFileAssistant::kUpdateFailed:
+      LOG(WARNING) << error_msg;
+      break;
+
+    case OatFileAssistant::kUpdateNotAttempted:
+      // Avoid spamming the logs if we decided not to attempt making the oat
+      // file up to date.
+      VLOG(oat) << error_msg;
+      break;
+
+    case OatFileAssistant::kUpdateSucceeded:
+      // Nothing to do.
+      break;
   }
 
   // Get the oat file on disk.
@@ -352,7 +377,7 @@
 
       // However, if the app was part of /system and preopted, there is no original dex file
       // available. In that case grudgingly accept the oat file.
-      if (!DexFile::MaybeDex(dex_location)) {
+      if (!oat_file_assistant.HasOriginalDexFiles()) {
         accept_oat_file = true;
         LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
                      << "Allow oat file use. This is potentially dangerous.";
@@ -437,7 +462,8 @@
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
         if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
           LOG(WARNING) << error_msg;
-          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
+          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location)
+                                + " because: " + error_msg);
         }
       } else {
         error_msgs->push_back("Fallback mode disabled, skipping dex files.");
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 574d0e2..7017dfc 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -25,7 +25,6 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "compiler_filter.h"
 #include "jni.h"
 
 namespace art {
@@ -60,6 +59,11 @@
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
+  // Find the oat file which contains a dex files with the given dex base location,
+  // returns null if there are none.
+  const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_base_location) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
   // Attempt to reserve a location, returns false if it is already reserved or already in used by
   // an oat file.
   bool RegisterOatFileLocation(const std::string& oat_location)
@@ -111,10 +115,6 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  static void SetCompilerFilter(CompilerFilter::Filter filter) {
-    filter_ = filter;
-  }
-
  private:
   // Check for duplicate class definitions of the given oat file against all open oat files.
   // Return true if there are any class definition collisions in the oat_file.
@@ -128,9 +128,6 @@
   std::unordered_map<std::string, size_t> oat_file_count_ GUARDED_BY(Locks::oat_file_count_lock_);
   bool have_non_pic_oat_file_;
 
-  // The compiler filter used for oat files loaded by the oat file manager.
-  static CompilerFilter::Filter filter_;
-
   DISALLOW_COPY_AND_ASSIGN(OatFileManager);
 };
 
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index 9786c05..0ab2bfe 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -17,23 +17,18 @@
 #include "oat_quick_method_header.h"
 
 #include "art_method.h"
-#include "mapping_table.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 
 namespace art {
 
 OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t mapping_table_offset,
     uint32_t vmap_table_offset,
-    uint32_t gc_map_offset,
     uint32_t frame_size_in_bytes,
     uint32_t core_spill_mask,
     uint32_t fp_spill_mask,
     uint32_t code_size)
-    : mapping_table_offset_(mapping_table_offset),
-      vmap_table_offset_(vmap_table_offset),
-      gc_map_offset_(gc_map_offset),
+    : vmap_table_offset_(vmap_table_offset),
       frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
       code_size_(code_size) {}
 
@@ -46,34 +41,14 @@
   uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   if (IsOptimized()) {
     CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
     if (stack_map.IsValid()) {
-      return stack_map.GetDexPc(encoding);
+      return stack_map.GetDexPc(encoding.stack_map_encoding);
     }
   } else {
-    MappingTable table(GetMappingTable());
-    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
-    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
-    if (table.TotalSize() == 0) {
-      DCHECK(method->IsNative());
-      return DexFile::kDexNoIndex;
-    }
-
-    // Assume the caller wants a pc-to-dex mapping so check here first.
-    typedef MappingTable::PcToDexIterator It;
-    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-    // Now check dex-to-pc mappings.
-    typedef MappingTable::DexToPcIterator It2;
-    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
+    DCHECK(method->IsNative());
+    return DexFile::kDexNoIndex;
   }
   if (abort_on_failure) {
     ScopedObjectAccess soa(Thread::Current());
@@ -91,43 +66,22 @@
                                                 bool is_for_catch_handler,
                                                 bool abort_on_failure) const {
   const void* entry_point = GetEntryPoint();
-  if (IsOptimized()) {
-    // Optimized code does not have a mapping table. Search for the dex-to-pc
-    // mapping in stack maps.
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+  DCHECK(!method->IsNative());
+  DCHECK(IsOptimized());
+  // Search for the dex-to-pc mapping in stack maps.
+  CodeInfo code_info = GetOptimizedCodeInfo();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
-    // All stack maps are stored in the same CodeItem section, safepoint stack
-    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
-    // the order of iteration.
-    StackMap stack_map =
-        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
-                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
-    if (stack_map.IsValid()) {
-      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
-    }
-  } else {
-    MappingTable table(GetMappingTable());
-    if (table.TotalSize() == 0) {
-      DCHECK_EQ(dex_pc, 0U);
-      return 0;   // Special no mapping/pc == 0 case
-    }
-    // Assume the caller wants a dex-to-pc mapping so check here first.
-    typedef MappingTable::DexToPcIterator It;
-    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-    // Now check pc-to-dex mappings.
-    typedef MappingTable::PcToDexIterator It2;
-    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
+  // All stack maps are stored in the same CodeItem section, safepoint stack
+  // maps first, then catch stack maps. We use `is_for_catch_handler` to select
+  // the order of iteration.
+  StackMap stack_map =
+      LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
+                                   : code_info.GetStackMapForDexPc(dex_pc, encoding);
+  if (stack_map.IsValid()) {
+    return reinterpret_cast<uintptr_t>(entry_point) +
+           stack_map.GetNativePcOffset(encoding.stack_map_encoding);
   }
-
   if (abort_on_failure) {
     ScopedObjectAccess soa(Thread::Current());
     LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index daabc6e..abddc6d 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -30,9 +30,7 @@
 // OatQuickMethodHeader precedes the raw code chunk generated by the compiler.
 class PACKED(4) OatQuickMethodHeader {
  public:
-  OatQuickMethodHeader(uint32_t mapping_table_offset = 0U,
-                       uint32_t vmap_table_offset = 0U,
-                       uint32_t gc_map_offset = 0U,
+  OatQuickMethodHeader(uint32_t vmap_table_offset = 0U,
                        uint32_t frame_size_in_bytes = 0U,
                        uint32_t core_spill_mask = 0U,
                        uint32_t fp_spill_mask = 0U,
@@ -60,7 +58,7 @@
   }
 
   bool IsOptimized() const {
-    return gc_map_offset_ == 0 && vmap_table_offset_ != 0;
+    return code_size_ != 0 && vmap_table_offset_ != 0;
   }
 
   const void* GetOptimizedCodeInfoPtr() const {
@@ -81,14 +79,6 @@
     return code_size_;
   }
 
-  const uint8_t* GetNativeGcMap() const {
-    return (gc_map_offset_ == 0) ? nullptr : code_ - gc_map_offset_;
-  }
-
-  const uint8_t* GetMappingTable() const {
-    return (mapping_table_offset_ == 0) ? nullptr : code_ - mapping_table_offset_;
-  }
-
   const uint8_t* GetVmapTable() const {
     CHECK(!IsOptimized()) << "Unimplemented vmap table for optimizing compiler";
     return (vmap_table_offset_ == 0) ? nullptr : code_ - vmap_table_offset_;
@@ -135,12 +125,8 @@
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
-  // The offset in bytes from the start of the mapping table to the end of the header.
-  uint32_t mapping_table_offset_;
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
-  // The offset in bytes from the start of the gc map to the end of the header.
-  uint32_t gc_map_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
   // The code size in bytes.
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index aff9b61..ca5efe5 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -58,10 +58,10 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 // This function is provided by android linker.
 extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 #undef LOG_TAG
 #define LOG_TAG "artopenjdk"
@@ -116,7 +116,18 @@
 
 /* posix lseek() */
 JNIEXPORT jlong JVM_Lseek(jint fd, jlong offset, jint whence) {
-    return TEMP_FAILURE_RETRY(lseek(fd, offset, whence));
+#if !defined(__APPLE__)
+    // NOTE: Using TEMP_FAILURE_RETRY here is busted for LP32 on glibc - the return
+    // value will be coerced into an int32_t.
+    //
+    // lseek64 isn't specified to return EINTR so it shouldn't be necessary
+    // anyway.
+    return lseek64(fd, offset, whence);
+#else
+    // NOTE: This code is compiled for Mac OS but isn't ever run on that
+    // platform.
+    return lseek(fd, offset, whence);
+#endif
 }
 
 /*
@@ -314,7 +325,7 @@
 }
 
 static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   if (javaLdLibraryPath != nullptr) {
     ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
     if (ldLibraryPath.c_str() != nullptr) {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 48c91f6..eac5b43 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -153,7 +153,7 @@
       .Define("-Xusejit:_")
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
-          .IntoKey(M::UseJIT)
+          .IntoKey(M::UseJitCompilation)
       .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
           .IntoKey(M::JITCodeCacheInitialCapacity)
@@ -169,6 +169,12 @@
       .Define("-Xjitosrthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITOsrThreshold)
+      .Define("-Xjitprithreadweight:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITPriorityThreadWeight)
+      .Define("-Xjittransitionweight:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITInvokeTransitionWeight)
       .Define("-Xjitsaveprofilinginfo")
           .WithValue(true)
           .IntoKey(M::JITSaveProfilingInfo)
@@ -286,9 +292,6 @@
           .IntoKey(M::Experimental)
       .Define("-Xforce-nb-testing")
           .IntoKey(M::ForceNativeBridge)
-      .Define("-XOatFileManagerCompilerFilter:_")
-          .WithType<std::string>()
-          .IntoKey(M::OatFileManagerCompilerFilter)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -468,6 +471,11 @@
     LOG(INFO) << "setting boot class path to " << *args.Get(M::BootClassPath);
   }
 
+  if (args.GetOrDefault(M::UseJitCompilation) && args.GetOrDefault(M::Interpret)) {
+    Usage("-Xusejit:true and -Xint cannot be specified together");
+    Exit(0);
+  }
+
   // Set a default boot class path if we didn't get an explicit one via command line.
   if (getenv("BOOTCLASSPATH") != nullptr) {
     args.SetIfMissing(M::BootClassPath, std::string(getenv("BOOTCLASSPATH")));
@@ -702,6 +710,7 @@
   UsageMessage(stream, "  -Xjitmaxsize:N\n");
   UsageMessage(stream, "  -Xjitwarmupthreshold:integervalue\n");
   UsageMessage(stream, "  -Xjitosrthreshold:integervalue\n");
+  UsageMessage(stream, "  -Xjitprithreadweight:integervalue\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index eb226d4..c2b34c8 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef __ANDROID__
+#ifndef ART_TARGET_ANDROID
 
 class PrebuiltToolsTest : public CommonRuntimeTest {
 };
@@ -61,6 +61,6 @@
   }
 }
 
-#endif  // __ANDROID__
+#endif  // ART_TARGET_ANDROID
 
 }  // namespace art
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index c7ccee2..1dea562 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -434,7 +434,7 @@
 bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier,
                                              InlineMethod* result) {
   DCHECK(verifier != nullptr);
-  if (!Runtime::Current()->UseJit()) {
+  if (!Runtime::Current()->UseJitCompilation()) {
     DCHECK_EQ(verifier->CanLoadClasses(), result != nullptr);
   }
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 6317f5e..a3e1f00 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -220,7 +220,7 @@
 
   const size_t number_of_vregs = handler_method_->GetCodeItem()->registers_size_;
   CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
   // Find stack map of the catch block.
   StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
@@ -342,11 +342,7 @@
         updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id);
         DCHECK(updated_vregs != nullptr);
       }
-      if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
-      } else {
-        HandleQuickDeoptimization(method, new_frame, updated_vregs);
-      }
+      HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
       if (updated_vregs != nullptr) {
         // Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs
         // array so this must come after we processed the frame.
@@ -386,11 +382,10 @@
     const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
     CodeInfo code_info = method_header->GetOptimizedCodeInfo();
     uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-    StackMapEncoding encoding = code_info.ExtractEncoding();
+    CodeInfoEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     const size_t number_of_vregs = m->GetCodeItem()->registers_size_;
-    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
-    uint32_t register_mask = stack_map.GetRegisterMask(encoding);
+    uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
     DexRegisterMap vreg_map = IsInInlinedFrame()
         ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1,
                                              code_info.GetInlineInfoOf(stack_map, encoding),
@@ -423,7 +418,8 @@
           const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
           value = *reinterpret_cast<const uint32_t*>(addr);
           uint32_t bit = (offset >> 2);
-          if (stack_mask.size_in_bits() > bit && stack_mask.LoadBit(bit)) {
+          if (stack_map.GetNumberOfStackMaskBits(encoding.stack_map_encoding) > bit &&
+              stack_map.GetStackMaskBit(encoding.stack_map_encoding, bit)) {
             is_reference = true;
           }
           break;
@@ -475,132 +471,6 @@
     return static_cast<VRegKind>(kinds.at(reg * 2));
   }
 
-  void HandleQuickDeoptimization(ArtMethod* m,
-                                 ShadowFrame* new_frame,
-                                 const bool* updated_vregs)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
-    uint16_t num_regs = code_item->registers_size_;
-    uint32_t dex_pc = GetDexPc();
-    StackHandleScope<2> hs(GetThread());  // Dex cache and class loader.
-    mirror::Class* declaring_class = m->GetDeclaringClass();
-    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
-    Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    verifier::MethodVerifier verifier(GetThread(), h_dex_cache->GetDexFile(), h_dex_cache,
-                                      h_class_loader, &m->GetClassDef(), code_item,
-                                      m->GetDexMethodIndex(), m, m->GetAccessFlags(), true, true,
-                                      true, true);
-    bool verifier_success = verifier.Verify();
-    CHECK(verifier_success) << PrettyMethod(m);
-    {
-      ScopedStackedShadowFramePusher pusher(GetThread(), new_frame,
-                                            StackedShadowFrameType::kShadowFrameUnderConstruction);
-      const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc));
-
-      // Markers for dead values, used when the verifier knows a Dex register is undefined,
-      // or when the compiler knows the register has not been initialized, or is not used
-      // anymore in the method.
-      static constexpr uint32_t kDeadValue = 0xEBADDE09;
-      static constexpr uint64_t kLongDeadValue = 0xEBADDE09EBADDE09;
-      for (uint16_t reg = 0; reg < num_regs; ++reg) {
-        if (updated_vregs != nullptr && updated_vregs[reg]) {
-          // Keep the value set by debugger.
-          continue;
-        }
-        VRegKind kind = GetVRegKind(reg, kinds);
-        switch (kind) {
-          case kUndefined:
-            new_frame->SetVReg(reg, kDeadValue);
-            break;
-          case kConstant:
-            new_frame->SetVReg(reg, kinds.at((reg * 2) + 1));
-            break;
-          case kReferenceVReg: {
-            uint32_t value = 0;
-            // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier.
-            // We don't want to copy a stale reference into the shadow frame as a reference.
-            // b/20736048
-            if (GetVReg(m, reg, kind, &value) && IsReferenceVReg(m, reg)) {
-              new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value));
-            } else {
-              new_frame->SetVReg(reg, kDeadValue);
-            }
-            break;
-          }
-          case kLongLoVReg:
-            if (GetVRegKind(reg + 1, kinds) == kLongHiVReg) {
-              // Treat it as a "long" register pair.
-              uint64_t value = 0;
-              if (GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &value)) {
-                new_frame->SetVRegLong(reg, value);
-              } else {
-                new_frame->SetVRegLong(reg, kLongDeadValue);
-              }
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kLongHiVReg:
-            if (GetVRegKind(reg - 1, kinds) == kLongLoVReg) {
-              // Nothing to do: we treated it as a "long" register pair.
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kDoubleLoVReg:
-            if (GetVRegKind(reg + 1, kinds) == kDoubleHiVReg) {
-              uint64_t value = 0;
-              if (GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &value)) {
-                // Treat it as a "double" register pair.
-                new_frame->SetVRegLong(reg, value);
-              } else {
-                new_frame->SetVRegLong(reg, kLongDeadValue);
-              }
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          case kDoubleHiVReg:
-            if (GetVRegKind(reg - 1, kinds) == kDoubleLoVReg) {
-              // Nothing to do: we treated it as a "double" register pair.
-            } else {
-              uint32_t value = 0;
-              if (GetVReg(m, reg, kind, &value)) {
-                new_frame->SetVReg(reg, value);
-              } else {
-                new_frame->SetVReg(reg, kDeadValue);
-              }
-            }
-            break;
-          default:
-            uint32_t value = 0;
-            if (GetVReg(m, reg, kind, &value)) {
-              new_frame->SetVReg(reg, value);
-            } else {
-              new_frame->SetVReg(reg, kDeadValue);
-            }
-            break;
-        }
-      }
-    }
-  }
-
   QuickExceptionHandler* const exception_handler_;
   ShadowFrame* prev_shadow_frame_;
   bool stacked_shadow_frame_pushed_;
@@ -639,7 +509,7 @@
   // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  if (Runtime::Current()->UseJit()) {
+  if (Runtime::Current()->UseJitCompilation()) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
         deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
   } else {
@@ -741,7 +611,7 @@
 // Prints out methods with their type of frame.
 class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
  public:
-  DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+  explicit DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         show_details_(show_details) {}
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 6234720..a098bf0 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -506,8 +506,6 @@
 };
 
 TEST_F(ReflectionTest, StaticMainMethod) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 96f41b3..ca8f8bb 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -60,7 +60,6 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
-#include "compiler_filter.h"
 #include "debugger.h"
 #include "elf_file.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
@@ -103,6 +102,7 @@
 #include "native/java_lang_VMClassLoader.h"
 #include "native/java_lang_ref_FinalizerReference.h"
 #include "native/java_lang_ref_Reference.h"
+#include "native/java_lang_reflect_AbstractMethod.h"
 #include "native/java_lang_reflect_Array.h"
 #include "native/java_lang_reflect_Constructor.h"
 #include "native/java_lang_reflect_Field.h"
@@ -203,6 +203,7 @@
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
       no_sig_chain_(false),
+      force_native_bridge_(false),
       is_native_bridge_loaded_(false),
       is_native_debuggable_(false),
       zygote_max_failed_boots_(0),
@@ -210,9 +211,11 @@
       oat_file_manager_(nullptr),
       is_low_memory_mode_(false),
       safe_mode_(false),
+      dump_native_stack_on_sig_quit_(true),
       pruned_dalvik_cache_(false),
       // Initially assume we perceive jank in case the process state is never updated.
-      process_state_(kProcessStateJankPerceptible) {
+      process_state_(kProcessStateJankPerceptible),
+      zygote_no_threads_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
   interpreter::CheckInterpreterAsmConstants();
@@ -541,7 +544,7 @@
 
   // If a debug host build, disable ptrace restriction for debugging and test timeout thread dump.
   // Only 64-bit as prctl() may fail in 32 bit userspace on a 64-bit kernel.
-#if defined(__linux__) && !defined(__ANDROID__) && defined(__x86_64__)
+#if defined(__linux__) && !defined(ART_TARGET_ANDROID) && defined(__x86_64__)
   if (kIsDebugBuild) {
     CHECK_EQ(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), 0);
   }
@@ -554,30 +557,34 @@
 
   started_ = true;
 
-  // Use !IsAotCompiler so that we get test coverage, tests are never the zygote.
-  if (!IsAotCompiler()) {
-    ScopedObjectAccess soa(self);
-    {
-      ScopedTrace trace2("AddImageStringsToTable");
-      GetInternTable()->AddImagesStringsToTable(heap_->GetBootImageSpaces());
+  // Create the JIT either if we have to use JIT compilation or save profiling info.
+  // TODO(calin): We use the JIT class as a proxy for JIT compilation and for
+  // recoding profiles. Maybe we should consider changing the name to be more clear it's
+  // not only about compiling. b/28295073.
+  if (jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) {
+    std::string error_msg;
+    if (!IsZygote()) {
+    // If we are the zygote then we need to wait until after forking to create the code cache
+    // due to SELinux restrictions on r/w/x memory regions.
+      CreateJit();
+    } else if (jit_options_->UseJitCompilation()) {
+      if (!jit::Jit::LoadCompilerLibrary(&error_msg)) {
+        // Try to load compiler pre zygote to reduce PSS. b/27744947
+        LOG(WARNING) << "Failed to load JIT compiler with error " << error_msg;
+      }
     }
-    {
-      ScopedTrace trace2("MoveImageClassesToClassTable");
-      GetClassLinker()->AddBootImageClassesToClassTable();
-    }
-  }
-
-  // If we are the zygote then we need to wait until after forking to create the code cache
-  // due to SELinux restrictions on r/w/x memory regions.
-  if (!IsZygote() && jit_options_->UseJIT()) {
-    CreateJit();
   }
 
   if (!IsImageDex2OatEnabled() || !GetHeap()->HasBootImageSpace()) {
     ScopedObjectAccess soa(self);
-    StackHandleScope<1> hs(soa.Self());
-    auto klass(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
-    class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
+    StackHandleScope<2> hs(soa.Self());
+
+    auto class_class(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
+    auto field_class(hs.NewHandle<mirror::Class>(mirror::Field::StaticClass()));
+
+    class_linker_->EnsureInitialized(soa.Self(), class_class, true, true);
+    // Field class is needed for register_java_net_InetAddress in libcore, b/28153851.
+    class_linker_->EnsureInitialized(soa.Self(), field_class, true, true);
   }
 
   // InitNativeMethods needs to be after started_ so that the classes
@@ -716,7 +723,11 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (!is_system_server && !safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
+
+  if (!is_system_server &&
+      !safe_mode_ &&
+      (jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) &&
+      jit_.get() == nullptr) {
     // Note that when running ART standalone (not zygote, nor zygote fork),
     // the jit may have already been created.
     CreateJit();
@@ -906,8 +917,8 @@
 
   oat_file_manager_ = new OatFileManager;
 
-  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold),
-                runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
+  Thread::SetSensitiveThreadHook(runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
+  Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold));
 
   boot_class_path_string_ = runtime_options.ReleaseOrDefault(Opt::BootClassPath);
   class_path_string_ = runtime_options.ReleaseOrDefault(Opt::ClassPath);
@@ -960,16 +971,6 @@
   experimental_flags_ = runtime_options.GetOrDefault(Opt::Experimental);
   is_low_memory_mode_ = runtime_options.Exists(Opt::LowMemoryMode);
 
-  {
-    CompilerFilter::Filter filter;
-    std::string filter_str = runtime_options.GetOrDefault(Opt::OatFileManagerCompilerFilter);
-    if (!CompilerFilter::ParseCompilerFilter(filter_str.c_str(), &filter)) {
-      LOG(ERROR) << "Cannot parse compiler filter " << filter_str;
-      return false;
-    }
-    OatFileManager::SetCompilerFilter(filter);
-  }
-
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize),
                        runtime_options.GetOrDefault(Opt::HeapGrowthLimit),
@@ -1019,7 +1020,8 @@
     // this case.
     // If runtime_options doesn't have UseJIT set to true then CreateFromRuntimeArguments returns
     // null and we don't create the jit.
-    jit_options_->SetUseJIT(false);
+    jit_options_->SetUseJitCompilation(false);
+    jit_options_->SetSaveProfilingInfo(false);
   }
 
   // Allocate a global table of boxed lambda objects <-> closures.
@@ -1134,6 +1136,14 @@
       }
       boot_class_path_string_ = Join(dex_locations, ':');
     }
+    {
+      ScopedTrace trace2("AddImageStringsToTable");
+      GetInternTable()->AddImagesStringsToTable(heap_->GetBootImageSpaces());
+    }
+    {
+      ScopedTrace trace2("MoveImageClassesToClassTable");
+      GetClassLinker()->AddBootImageClassesToClassTable();
+    }
   } else {
     std::vector<std::string> dex_filenames;
     Split(boot_class_path_string_, ':', &dex_filenames);
@@ -1348,6 +1358,7 @@
   register_java_lang_DexCache(env);
   register_java_lang_Object(env);
   register_java_lang_ref_FinalizerReference(env);
+  register_java_lang_reflect_AbstractMethod(env);
   register_java_lang_reflect_Array(env);
   register_java_lang_reflect_Constructor(env);
   register_java_lang_reflect_Field(env);
@@ -1607,18 +1618,19 @@
   }
 }
 
-static ImtConflictTable::Entry empty_entry = { nullptr, nullptr };
-
 ArtMethod* Runtime::CreateImtConflictMethod(LinearAlloc* linear_alloc) {
-  auto* method = Runtime::Current()->GetClassLinker()->CreateRuntimeMethod(linear_alloc);
+  ClassLinker* const class_linker = GetClassLinker();
+  ArtMethod* method = class_linker->CreateRuntimeMethod(linear_alloc);
   // When compiling, the code pointer will get set later when the image is loaded.
+  const size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
   if (IsAotCompiler()) {
-    size_t pointer_size = GetInstructionSetPointerSize(instruction_set_);
     method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   } else {
     method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictStub());
-    method->SetImtConflictTable(reinterpret_cast<ImtConflictTable*>(&empty_entry));
   }
+  // Create empty conflict table.
+  method->SetImtConflictTable(class_linker->CreateImtConflictTable(/*count*/0u, linear_alloc),
+                              pointer_size);
   return method;
 }
 
@@ -1626,9 +1638,6 @@
   CHECK(method != nullptr);
   CHECK(method->IsRuntimeMethod());
   imt_conflict_method_ = method;
-  if (!IsAotCompiler()) {
-    method->SetImtConflictTable(reinterpret_cast<ImtConflictTable*>(&empty_entry));
-  }
 }
 
 ArtMethod* Runtime::CreateResolutionMethod() {
@@ -1909,21 +1918,12 @@
 
 void Runtime::CreateJit() {
   CHECK(!IsAotCompiler());
-  if (GetInstrumentation()->IsForcedInterpretOnly()) {
-    // Don't create JIT if forced interpret only.
-    return;
+  if (kIsDebugBuild && GetInstrumentation()->IsForcedInterpretOnly()) {
+    DCHECK(!jit_options_->UseJitCompilation());
   }
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
-  if (jit_.get() != nullptr) {
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
-                                     jit_options_->GetWarmupThreshold(),
-                                     jit_options_->GetOsrThreshold());
-    jit_->CreateThreadPool();
-
-    // Notify native debugger about the classes already loaded before the creation of the jit.
-    jit_->DumpTypeInfoForLoadedTypes(GetClassLinker());
-  } else {
+  if (jit_.get() == nullptr) {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
 }
@@ -1946,8 +1946,20 @@
   CHECK(method != nullptr);
   CHECK(method->IsRuntimeMethod());
   imt_unimplemented_method_ = method;
-  if (!IsAotCompiler()) {
-    method->SetImtConflictTable(reinterpret_cast<ImtConflictTable*>(&empty_entry));
+}
+
+void Runtime::FixupConflictTables() {
+  // We can only do this after the class linker is created.
+  const size_t pointer_size = GetClassLinker()->GetImagePointerSize();
+  if (imt_unimplemented_method_->GetImtConflictTable(pointer_size) == nullptr) {
+    imt_unimplemented_method_->SetImtConflictTable(
+        ClassLinker::CreateImtConflictTable(/*count*/0u, GetLinearAlloc(), pointer_size),
+        pointer_size);
+  }
+  if (imt_conflict_method_->GetImtConflictTable(pointer_size) == nullptr) {
+    imt_conflict_method_->SetImtConflictTable(
+          ClassLinker::CreateImtConflictTable(/*count*/0u, GetLinearAlloc(), pointer_size),
+          pointer_size);
   }
 }
 
@@ -1983,4 +1995,18 @@
   GetHeap()->UpdateProcessState(old_process_state, process_state);
 }
 
+void Runtime::RegisterSensitiveThread() const {
+  Thread::SetJitSensitiveThread();
+}
+
+// Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
+bool Runtime::UseJitCompilation() const {
+  return (jit_ != nullptr) && jit_->UseJitCompilation();
+}
+
+// Returns true if profile saving is enabled. GetJit() will be not null in this case.
+bool Runtime::SaveProfileInfo() const {
+  return (jit_ != nullptr) && jit_->SaveProfilingInfo();
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6a6fdb7..1394462 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -127,7 +127,7 @@
 
   // IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
   bool IsAotCompiler() const {
-    return !UseJit() && IsCompiler();
+    return !UseJitCompilation() && IsCompiler();
   }
 
   // IsCompiler is any runtime which has a running compiler, either dex2oat or JIT.
@@ -383,6 +383,7 @@
     return imt_conflict_method_ != nullptr;
   }
 
+  void FixupConflictTables();
   void SetImtConflictMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
   void SetImtUnimplementedMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -451,9 +452,11 @@
   jit::Jit* GetJit() {
     return jit_.get();
   }
-  bool UseJit() const {
-    return jit_.get() != nullptr;
-  }
+
+  // Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
+  bool UseJitCompilation() const;
+  // Returns true if profile saving is enabled. GetJit() will be not null in this case.
+  bool SaveProfileInfo() const;
 
   void PreZygoteFork();
   bool InitZygote();
@@ -635,6 +638,16 @@
     return process_state_ == kProcessStateJankPerceptible;
   }
 
+  void RegisterSensitiveThread() const;
+
+  void SetZygoteNoThreadSection(bool val) {
+    zygote_no_threads_ = val;
+  }
+
+  bool IsZygoteNoThreadSection() const {
+    return zygote_no_threads_;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -856,6 +869,9 @@
   // Whether or not we currently care about pause times.
   ProcessState process_state_;
 
+  // Whether zygote code is in a section that should not start threads.
+  bool zygote_no_threads_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 6a50ffa..635ff51 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -66,11 +66,13 @@
 RUNTIME_OPTIONS_KEY (Unit,                LowMemoryMode)
 RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        (kUseTlab || kUseReadBarrier))
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
-RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
+RUNTIME_OPTIONS_KEY (bool,                UseJitCompilation,              false)
 RUNTIME_OPTIONS_KEY (bool,                DumpNativeStackOnSigQuit,       true)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITOsrThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITPriorityThreadWeight)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITInvokeTransitionWeight)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
 RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
@@ -127,11 +129,12 @@
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
 RUNTIME_OPTIONS_KEY (int32_t (*)(FILE* stream, const char* format, va_list ap), \
                                           HookVfprintf,                   vfprintf)
+// Use _exit instead of exit so that we won't get DCHECK failures in global data
+// destructors. b/28106055.
 RUNTIME_OPTIONS_KEY (void (*)(int32_t status), \
-                                          HookExit,                       exit)
+                                          HookExit,                       _exit)
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
-RUNTIME_OPTIONS_KEY (std::string,         OatFileManagerCompilerFilter,   "speed")
 
 #undef RUNTIME_OPTIONS_KEY
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 0e5b503..49f80f3 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -19,6 +19,7 @@
 
 #include <map>
 #include <memory>
+#include <type_traits>
 
 #include "base/allocator.h"
 #include "base/logging.h"
@@ -124,6 +125,18 @@
     return result.first;
   }
 
+  template <typename CreateFn>
+  V GetOrCreate(const K& k, CreateFn create) {
+    static_assert(std::is_same<V, typename std::result_of<CreateFn()>::type>::value,
+                  "Argument `create` should return a value of type V.");
+    auto lb = lower_bound(k);
+    if (lb != end() && !key_comp()(k, lb->first)) {
+      return lb->second;
+    }
+    auto it = PutBefore(lb, k, create());
+    return it->second;
+  }
+
   bool Equals(const Self& rhs) const {
     return map_ == rhs.map_;
   }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index ee5da8e..a5ca527 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -21,7 +21,6 @@
 #include "base/hex_dump.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
-#include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "jit/jit.h"
@@ -36,7 +35,6 @@
 #include "thread.h"
 #include "thread_list.h"
 #include "verify_object-inl.h"
-#include "vmap_table.h"
 
 namespace art {
 
@@ -117,7 +115,7 @@
   const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
   uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
   CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
   return code_info.GetInlineInfoOf(stack_map, encoding);
@@ -130,7 +128,19 @@
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
       InlineInfo inline_info = GetCurrentInlineInfo();
-      return GetResolvedMethod(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map);
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
+      bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve;
+      return allow_resolve
+          ? GetResolvedMethod<true>(*GetCurrentQuickFrame(),
+                                    inline_info,
+                                    encoding.inline_info_encoding,
+                                    depth_in_stack_map)
+          : GetResolvedMethod<false>(*GetCurrentQuickFrame(),
+                                     inline_info,
+                                     encoding.inline_info_encoding,
+                                     depth_in_stack_map);
     } else {
       return *cur_quick_frame_;
     }
@@ -144,7 +154,10 @@
   } else if (cur_quick_frame_ != nullptr) {
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
-      return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      return GetCurrentInlineInfo().GetDexPcAtDepth(encoding.inline_info_encoding,
+                                                    depth_in_stack_map);
     } else if (cur_oat_quick_method_header_ == nullptr) {
       return DexFile::kDexNoIndex;
     } else {
@@ -200,33 +213,6 @@
   return GetCurrentOatQuickMethodHeader()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
-bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) {
-  DCHECK_EQ(m, GetMethod());
-  // Process register map (which native and runtime methods don't have)
-  if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
-    return false;
-  }
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  if (method_header->IsOptimized()) {
-    return true;  // TODO: Implement.
-  }
-  const uint8_t* native_gc_map = method_header->GetNativeGcMap();
-  CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  // Can't be null or how would we compile its instructions?
-  DCHECK(code_item != nullptr) << PrettyMethod(m);
-  NativePcOffsetToReferenceMap map(native_gc_map);
-  size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
-  const uint8_t* reg_bitmap = nullptr;
-  if (num_regs > 0) {
-    uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-    reg_bitmap = map.FindBitMap(native_pc_offset);
-    DCHECK(reg_bitmap != nullptr);
-  }
-  // Does this register hold a reference?
-  return vreg < num_regs && TestBitmap(vreg, reg_bitmap);
-}
-
 bool StackVisitor::GetVRegFromDebuggerShadowFrame(uint16_t vreg,
                                                   VRegKind kind,
                                                   uint32_t* val) const {
@@ -258,11 +244,8 @@
     if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) {
       return true;
     }
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return GetVRegFromOptimizedCode(m, vreg, kind, val);
-    } else {
-      return GetVRegFromQuickCode(m, vreg, kind, val);
-    }
+    DCHECK(cur_oat_quick_method_header_->IsOptimized());
+    return GetVRegFromOptimizedCode(m, vreg, kind, val);
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
     if (kind == kReferenceVReg) {
@@ -275,29 +258,6 @@
   }
 }
 
-bool StackVisitor::GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
-                                        uint32_t* val) const {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-    return GetRegisterIfAccessible(reg, kind, val);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    *val = *GetVRegAddrFromQuickCode(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-                                     frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    return true;
-  }
-}
-
 bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                             uint32_t* val) const {
   DCHECK_EQ(m, GetMethod());
@@ -308,7 +268,7 @@
   DCHECK_LT(vreg, code_item->registers_size_);
   const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
   CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
 
   uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
@@ -417,11 +377,8 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
-    } else {
-      return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val);
-    }
+    DCHECK(cur_oat_quick_method_header_->IsOptimized());
+    return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
     *val = cur_shadow_frame_->GetVRegLong(vreg);
@@ -429,33 +386,6 @@
   }
 }
 
-bool StackVisitor::GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
-                                            VRegKind kind_hi, uint64_t* val) const {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset_lo, vmap_offset_hi;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
-    bool is_float = (kind_lo == kDoubleLoVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
-    uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-    return GetRegisterPairIfAccessible(reg_lo, reg_hi, kind_lo, val);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *val = *reinterpret_cast<uint64_t*>(addr);
-    return true;
-  }
-}
-
 bool StackVisitor::GetVRegPairFromOptimizedCode(ArtMethod* m, uint16_t vreg,
                                                 VRegKind kind_lo, VRegKind kind_hi,
                                                 uint64_t* val) const {
@@ -707,8 +637,8 @@
 
   // If we are the JIT then we may have just compiled the method after the
   // IsQuickToInterpreterBridge check.
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
+  Runtime* runtime = Runtime::Current();
+  if (runtime->UseJitCompilation() && runtime->GetJit()->GetCodeCache()->ContainsPc(code)) {
     return;
   }
 
@@ -748,8 +678,10 @@
           if (space->IsImageSpace()) {
             auto* image_space = space->AsImageSpace();
             const auto& header = image_space->GetImageHeader();
-            const auto* methods = &header.GetMethodsSection();
-            if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) {
+            const ImageSection& methods = header.GetMethodsSection();
+            const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
+            const size_t offset =  reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+            if (methods.Contains(offset) || runtime_methods.Contains(offset)) {
               in_image = true;
               break;
             }
@@ -859,18 +791,19 @@
         cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
         SanityCheckFrame();
 
-        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
+        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames ||
+             walk_kind_ == StackWalkKind::kIncludeInlinedFramesNoResolve)
             && (cur_oat_quick_method_header_ != nullptr)
             && cur_oat_quick_method_header_->IsOptimized()) {
           CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo();
-          StackMapEncoding encoding = code_info.ExtractEncoding();
+          CodeInfoEncoding encoding = code_info.ExtractEncoding();
           uint32_t native_pc_offset =
               cur_oat_quick_method_header_->NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-          if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding)) {
+          if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
             DCHECK_EQ(current_inlining_depth_, 0u);
-            for (current_inlining_depth_ = inline_info.GetDepth();
+            for (current_inlining_depth_ = inline_info.GetDepth(encoding.inline_info_encoding);
                  current_inlining_depth_ != 0;
                  --current_inlining_depth_) {
               bool should_continue = VisitFrame();
@@ -1017,7 +950,7 @@
   }
 }
 
-void LockCountData::AddMonitorInternal(Thread* self, mirror::Object* obj) {
+void LockCountData::AddMonitor(Thread* self, mirror::Object* obj) {
   if (obj == nullptr) {
     return;
   }
@@ -1034,7 +967,7 @@
   monitors_->push_back(obj);
 }
 
-void LockCountData::RemoveMonitorInternal(Thread* self, const mirror::Object* obj) {
+void LockCountData::RemoveMonitorOrThrow(Thread* self, const mirror::Object* obj) {
   if (obj == nullptr) {
     return;
   }
@@ -1067,7 +1000,7 @@
   obj->MonitorExit(self);
 }
 
-bool LockCountData::CheckAllMonitorsReleasedInternal(Thread* self) {
+bool LockCountData::CheckAllMonitorsReleasedOrThrow(Thread* self) {
   DCHECK(self != nullptr);
   if (monitors_ != nullptr) {
     if (!monitors_->empty()) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 4fa1a4f..e77ab46 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -80,39 +80,18 @@
  public:
   // Add the given object to the list of monitors, that is, objects that have been locked. This
   // will not throw (but be skipped if there is an exception pending on entry).
-  template <bool kLockCounting>
-  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return;
-    }
-    AddMonitorInternal(self, obj);
-  }
+  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Try to remove the given object from the monitor list, indicating an unlock operation.
   // This will throw an IllegalMonitorStateException (clearing any already pending exception), in
   // case that there wasn't a lock recorded for the object.
-  template <bool kLockCounting>
   void RemoveMonitorOrThrow(Thread* self,
-                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return;
-    }
-    RemoveMonitorInternal(self, obj);
-  }
+                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check whether all acquired monitors have been released. This will potentially throw an
   // IllegalMonitorStateException, clearing any already pending exception. Returns true if the
   // check shows that everything is OK wrt/ lock counting, false otherwise.
-  template <bool kLockCounting>
-  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(self != nullptr);
-    if (!kLockCounting) {
-      return true;
-    }
-    return CheckAllMonitorsReleasedInternal(self);
-  }
+  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <typename T, typename... Args>
   void VisitMonitors(T visitor, Args&&... args) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -125,12 +104,6 @@
   }
 
  private:
-  // Internal implementations.
-  void AddMonitorInternal(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
-  void RemoveMonitorInternal(Thread* self, const mirror::Object* obj)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool CheckAllMonitorsReleasedInternal(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Stores references to the locked-on objects. As noted, this should be visited during thread
   // marking.
   std::unique_ptr<std::vector<mirror::Object*>> monitors_;
@@ -187,6 +160,22 @@
     return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
   }
 
+  int16_t GetCachedHotnessCountdown() const {
+    return cached_hotness_countdown_;
+  }
+
+  void SetCachedHotnessCountdown(int16_t cached_hotness_countdown) {
+    cached_hotness_countdown_ = cached_hotness_countdown;
+  }
+
+  int16_t GetHotnessCountdown() const {
+    return hotness_countdown_;
+  }
+
+  void SetHotnessCountdown(int16_t hotness_countdown) {
+    hotness_countdown_ = hotness_countdown;
+  }
+
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
     dex_pc_ptr_ = nullptr;
@@ -397,6 +386,14 @@
     return OFFSETOF_MEMBER(ShadowFrame, code_item_);
   }
 
+  static size_t CachedHotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, cached_hotness_countdown_);
+  }
+
+  static size_t HotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, hotness_countdown_);
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -406,7 +403,7 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
-  uint16_t* GetDexPCPtr() {
+  const uint16_t* GetDexPCPtr() {
     return dex_pc_ptr_;
   }
 
@@ -443,11 +440,13 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   JValue* result_register_;
-  uint16_t* dex_pc_ptr_;
+  const uint16_t* dex_pc_ptr_;
   const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
   const uint32_t number_of_vregs_;
   uint32_t dex_pc_;
+  int16_t cached_hotness_countdown_;
+  int16_t hotness_countdown_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
@@ -569,6 +568,7 @@
   // when walking the stack.
   enum class StackWalkKind {
     kIncludeInlinedFrames,
+    kIncludeInlinedFramesNoResolve,
     kSkipInlinedFrames,
   };
 
@@ -633,9 +633,6 @@
   bool GetNextMethodAndDexPc(ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsReferenceVReg(ArtMethod* m, uint16_t vreg)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   bool GetVReg(ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -797,9 +794,6 @@
 
   bool GetVRegFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
-                            uint32_t* val) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                 uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -807,9 +801,6 @@
   bool GetVRegPairFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind_lo, VRegKind kind_hi,
                                           uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
-                                VRegKind kind_hi, uint64_t* val) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetVRegPairFromOptimizedCode(ArtMethod* m, uint16_t vreg,
                                     VRegKind kind_lo, VRegKind kind_hi,
                                     uint64_t* val) const
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 3093436..a7e7c21 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -56,66 +56,29 @@
     uint16_t dex_register_number,
     uint16_t number_of_dex_registers,
     const CodeInfo& code_info,
-    const StackMapEncoding& enc) const {
+    const CodeInfoEncoding& enc) const {
   DexRegisterLocationCatalog dex_register_location_catalog =
       code_info.GetDexRegisterLocationCatalog(enc);
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries(enc));
   return dex_register_location_catalog.GetLocationInternalKind(location_catalog_entry_index);
 }
 
 DexRegisterLocation DexRegisterMap::GetDexRegisterLocation(uint16_t dex_register_number,
                                                            uint16_t number_of_dex_registers,
                                                            const CodeInfo& code_info,
-                                                           const StackMapEncoding& enc) const {
+                                                           const CodeInfoEncoding& enc) const {
   DexRegisterLocationCatalog dex_register_location_catalog =
       code_info.GetDexRegisterLocationCatalog(enc);
   size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
       dex_register_number,
       number_of_dex_registers,
-      code_info.GetNumberOfLocationCatalogEntries());
+      code_info.GetNumberOfLocationCatalogEntries(enc));
   return dex_register_location_catalog.GetDexRegisterLocation(location_catalog_entry_index);
 }
 
-uint32_t StackMap::LoadAt(size_t number_of_bytes, size_t offset, bool check_max) const {
-  if (number_of_bytes == 0u) {
-    DCHECK(!check_max);
-    return 0;
-  } else if (number_of_bytes == 1u) {
-    uint8_t value = region_.LoadUnaligned<uint8_t>(offset);
-    return (check_max && value == 0xFF) ? -1 : value;
-  } else if (number_of_bytes == 2u) {
-    uint16_t value = region_.LoadUnaligned<uint16_t>(offset);
-    return (check_max && value == 0xFFFF) ? -1 : value;
-  } else if (number_of_bytes == 3u) {
-    uint16_t low = region_.LoadUnaligned<uint16_t>(offset);
-    uint16_t high = region_.LoadUnaligned<uint8_t>(offset + sizeof(uint16_t));
-    uint32_t value = (high << 16) + low;
-    return (check_max && value == 0xFFFFFF) ? -1 : value;
-  } else {
-    DCHECK_EQ(number_of_bytes, 4u);
-    return region_.LoadUnaligned<uint32_t>(offset);
-  }
-}
-
-void StackMap::StoreAt(size_t number_of_bytes, size_t offset, uint32_t value) const {
-  if (number_of_bytes == 0u) {
-    DCHECK_EQ(value, 0u);
-  } else if (number_of_bytes == 1u) {
-    region_.StoreUnaligned<uint8_t>(offset, value);
-  } else if (number_of_bytes == 2u) {
-    region_.StoreUnaligned<uint16_t>(offset, value);
-  } else if (number_of_bytes == 3u) {
-    region_.StoreUnaligned<uint16_t>(offset, Low16Bits(value));
-    region_.StoreUnaligned<uint8_t>(offset + sizeof(uint16_t), High16Bits(value));
-  } else {
-    region_.StoreUnaligned<uint32_t>(offset, value);
-    DCHECK_EQ(number_of_bytes, 4u);
-  }
-}
-
 static void DumpRegisterMapping(std::ostream& os,
                                 size_t dex_register_num,
                                 DexRegisterLocation location,
@@ -126,25 +89,44 @@
      << " (" << location.GetValue() << ")" << suffix << '\n';
 }
 
+void StackMapEncoding::Dump(VariableIndentationOutputStream* vios) const {
+  vios->Stream()
+      << "StackMapEncoding"
+      << " (native_pc_bit_offset=" << static_cast<uint32_t>(kNativePcBitOffset)
+      << ", dex_pc_bit_offset=" << static_cast<uint32_t>(dex_pc_bit_offset_)
+      << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
+      << ", inline_info_bit_offset=" << static_cast<uint32_t>(inline_info_bit_offset_)
+      << ", register_mask_bit_offset=" << static_cast<uint32_t>(register_mask_bit_offset_)
+      << ", stack_mask_bit_offset=" << static_cast<uint32_t>(stack_mask_bit_offset_)
+      << ")\n";
+}
+
+void InlineInfoEncoding::Dump(VariableIndentationOutputStream* vios) const {
+  vios->Stream()
+      << "InlineInfoEncoding"
+      << " (method_index_bit_offset=" << static_cast<uint32_t>(kMethodIndexBitOffset)
+      << ", dex_pc_bit_offset=" << static_cast<uint32_t>(dex_pc_bit_offset_)
+      << ", invoke_type_bit_offset=" << static_cast<uint32_t>(invoke_type_bit_offset_)
+      << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
+      << ", total_bit_size=" << static_cast<uint32_t>(total_bit_size_)
+      << ")\n";
+}
+
 void CodeInfo::Dump(VariableIndentationOutputStream* vios,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     bool dump_stack_maps) const {
-  StackMapEncoding encoding = ExtractEncoding();
-  uint32_t code_info_size = GetOverallSize();
-  size_t number_of_stack_maps = GetNumberOfStackMaps();
+  CodeInfoEncoding encoding = ExtractEncoding();
+  size_t number_of_stack_maps = GetNumberOfStackMaps(encoding);
   vios->Stream()
-      << "Optimized CodeInfo (size=" << code_info_size
-      << ", number_of_dex_registers=" << number_of_dex_registers
+      << "Optimized CodeInfo (number_of_dex_registers=" << number_of_dex_registers
       << ", number_of_stack_maps=" << number_of_stack_maps
-      << ", has_inline_info=" << encoding.HasInlineInfo()
-      << ", number_of_bytes_for_inline_info=" << encoding.NumberOfBytesForInlineInfo()
-      << ", number_of_bytes_for_dex_register_map=" << encoding.NumberOfBytesForDexRegisterMap()
-      << ", number_of_bytes_for_dex_pc=" << encoding.NumberOfBytesForDexPc()
-      << ", number_of_bytes_for_native_pc=" << encoding.NumberOfBytesForNativePc()
-      << ", number_of_bytes_for_register_mask=" << encoding.NumberOfBytesForRegisterMask()
       << ")\n";
   ScopedIndentation indent1(vios);
+  encoding.stack_map_encoding.Dump(vios);
+  if (HasInlineInfo(encoding)) {
+    encoding.inline_info_encoding.Dump(vios);
+  }
   // Display the Dex register location catalog.
   GetDexRegisterLocationCatalog(encoding).Dump(vios, *this);
   // Display stack maps along with (live) Dex register maps.
@@ -165,8 +147,8 @@
 
 void DexRegisterLocationCatalog::Dump(VariableIndentationOutputStream* vios,
                                       const CodeInfo& code_info) {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
   size_t location_catalog_size_in_bytes = code_info.GetDexRegisterLocationCatalogSize(encoding);
   vios->Stream()
       << "DexRegisterLocationCatalog (number_of_entries=" << number_of_location_catalog_entries
@@ -181,8 +163,8 @@
 void DexRegisterMap::Dump(VariableIndentationOutputStream* vios,
                           const CodeInfo& code_info,
                           uint16_t number_of_dex_registers) const {
-  StackMapEncoding encoding = code_info.ExtractEncoding();
-  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  size_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
   // TODO: Display the bit mask of live Dex registers.
   for (size_t j = 0; j < number_of_dex_registers; ++j) {
     if (IsDexRegisterLive(j)) {
@@ -202,32 +184,32 @@
 
 void StackMap::Dump(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
-                    const StackMapEncoding& encoding,
+                    const CodeInfoEncoding& encoding,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     const std::string& header_suffix) const {
+  StackMapEncoding stack_map_encoding = encoding.stack_map_encoding;
   vios->Stream()
       << "StackMap" << header_suffix
       << std::hex
-      << " [native_pc=0x" << code_offset + GetNativePcOffset(encoding) << "]"
-      << " (dex_pc=0x" << GetDexPc(encoding)
-      << ", native_pc_offset=0x" << GetNativePcOffset(encoding)
-      << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(encoding)
-      << ", inline_info_offset=0x" << GetInlineDescriptorOffset(encoding)
-      << ", register_mask=0x" << GetRegisterMask(encoding)
+      << " [native_pc=0x" << code_offset + GetNativePcOffset(stack_map_encoding) << "]"
+      << " (dex_pc=0x" << GetDexPc(stack_map_encoding)
+      << ", native_pc_offset=0x" << GetNativePcOffset(stack_map_encoding)
+      << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(stack_map_encoding)
+      << ", inline_info_offset=0x" << GetInlineDescriptorOffset(stack_map_encoding)
+      << ", register_mask=0x" << GetRegisterMask(stack_map_encoding)
       << std::dec
       << ", stack_mask=0b";
-  MemoryRegion stack_mask = GetStackMask(encoding);
-  for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) {
-    vios->Stream() << stack_mask.LoadBit(e - i - 1);
+  for (size_t i = 0, e = GetNumberOfStackMaskBits(stack_map_encoding); i < e; ++i) {
+    vios->Stream() << GetStackMaskBit(stack_map_encoding, e - i - 1);
   }
   vios->Stream() << ")\n";
-  if (HasDexRegisterMap(encoding)) {
+  if (HasDexRegisterMap(stack_map_encoding)) {
     DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
         *this, encoding, number_of_dex_registers);
     dex_register_map.Dump(vios, code_info, number_of_dex_registers);
   }
-  if (HasInlineInfo(encoding)) {
+  if (HasInlineInfo(stack_map_encoding)) {
     InlineInfo inline_info = code_info.GetInlineInfoOf(*this, encoding);
     // We do not know the length of the dex register maps of inlined frames
     // at this level, so we just pass null to `InlineInfo::Dump` to tell
@@ -239,19 +221,23 @@
 void InlineInfo::Dump(VariableIndentationOutputStream* vios,
                       const CodeInfo& code_info,
                       uint16_t number_of_dex_registers[]) const {
-  vios->Stream() << "InlineInfo with depth " << static_cast<uint32_t>(GetDepth()) << "\n";
+  InlineInfoEncoding inline_info_encoding = code_info.ExtractEncoding().inline_info_encoding;
+  vios->Stream() << "InlineInfo with depth "
+                 << static_cast<uint32_t>(GetDepth(inline_info_encoding))
+                 << "\n";
 
-  for (size_t i = 0; i < GetDepth(); ++i) {
+  for (size_t i = 0; i < GetDepth(inline_info_encoding); ++i) {
     vios->Stream()
         << " At depth " << i
         << std::hex
-        << " (dex_pc=0x" << GetDexPcAtDepth(i)
+        << " (dex_pc=0x" << GetDexPcAtDepth(inline_info_encoding, i)
         << std::dec
-        << ", method_index=" << GetMethodIndexAtDepth(i)
-        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(i))
+        << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, i)
+        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(inline_info_encoding,
+                                                                            i))
         << ")\n";
-    if (HasDexRegisterMapAtDepth(i) && (number_of_dex_registers != nullptr)) {
-      StackMapEncoding encoding = code_info.ExtractEncoding();
+    if (HasDexRegisterMapAtDepth(inline_info_encoding, i) && (number_of_dex_registers != nullptr)) {
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
       DexRegisterMap dex_register_map =
           code_info.GetDexRegisterMapAtDepth(i, *this, encoding, number_of_dex_registers[i]);
       ScopedIndentation indent1(vios);
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index dbf23aa..7c50f97 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -20,15 +20,10 @@
 #include "base/bit_vector.h"
 #include "base/bit_utils.h"
 #include "memory_region.h"
+#include "leb128.h"
 
 namespace art {
 
-#define ELEMENT_BYTE_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## Offset + sizeof(PreviousElement ## Type)
-
-#define ELEMENT_BIT_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## BitOffset + PreviousElement ## BitSize
-
 class VariableIndentationOutputStream;
 
 // Size of a frame slot, in bytes.  This constant is a signed value,
@@ -39,13 +34,9 @@
 // Size of Dex virtual registers.
 static constexpr size_t kVRegSize = 4;
 
-// We encode the number of bytes needed for writing a value on 3 bits
-// (i.e. up to 8 values), for values that we know are maximum 32-bit
-// long.
-static constexpr size_t kNumberOfBitForNumberOfBytesForEncoding = 3;
-
 class CodeInfo;
 class StackMapEncoding;
+struct CodeInfoEncoding;
 
 /**
  * Classes in the following file are wrapper on stack map information backed
@@ -459,7 +450,7 @@
   DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number,
                                             uint16_t number_of_dex_registers,
                                             const CodeInfo& code_info,
-                                            const StackMapEncoding& enc) const {
+                                            const CodeInfoEncoding& enc) const {
     return DexRegisterLocation::ConvertToSurfaceKind(
         GetLocationInternalKind(dex_register_number, number_of_dex_registers, code_info, enc));
   }
@@ -468,18 +459,18 @@
   DexRegisterLocation::Kind GetLocationInternalKind(uint16_t dex_register_number,
                                                     uint16_t number_of_dex_registers,
                                                     const CodeInfo& code_info,
-                                                    const StackMapEncoding& enc) const;
+                                                    const CodeInfoEncoding& enc) const;
 
   // Get the Dex register location `dex_register_number`.
   DexRegisterLocation GetDexRegisterLocation(uint16_t dex_register_number,
                                              uint16_t number_of_dex_registers,
                                              const CodeInfo& code_info,
-                                             const StackMapEncoding& enc) const;
+                                             const CodeInfoEncoding& enc) const;
 
   int32_t GetStackOffsetInBytes(uint16_t dex_register_number,
                                 uint16_t number_of_dex_registers,
                                 const CodeInfo& code_info,
-                                const StackMapEncoding& enc) const {
+                                const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
     DCHECK(location.GetKind() == DexRegisterLocation::Kind::kInStack);
@@ -490,7 +481,7 @@
   int32_t GetConstant(uint16_t dex_register_number,
                       uint16_t number_of_dex_registers,
                       const CodeInfo& code_info,
-                      const StackMapEncoding& enc) const {
+                      const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
     DCHECK_EQ(location.GetKind(), DexRegisterLocation::Kind::kConstant);
@@ -500,7 +491,7 @@
   int32_t GetMachineRegister(uint16_t dex_register_number,
                              uint16_t number_of_dex_registers,
                              const CodeInfo& code_info,
-                             const StackMapEncoding& enc) const {
+                             const CodeInfoEncoding& enc) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info, enc);
     DCHECK(location.GetInternalKind() == DexRegisterLocation::Kind::kInRegister ||
@@ -657,109 +648,131 @@
   friend class StackMapStream;
 };
 
+// Represents bit range of bit-packed integer field.
+// We reuse the idea from ULEB128p1 to support encoding of -1 (aka 0xFFFFFFFF).
+// If min_value is set to -1, we implicitly subtract one from any loaded value,
+// and add one to any stored value. This is generalized to any negative values.
+// In other words, min_value acts as a base and the stored value is added to it.
+struct FieldEncoding {
+  FieldEncoding(size_t start_offset, size_t end_offset, int32_t min_value = 0)
+      : start_offset_(start_offset), end_offset_(end_offset), min_value_(min_value) {
+    DCHECK_LE(start_offset_, end_offset_);
+    DCHECK_LE(BitSize(), 32u);
+  }
+
+  ALWAYS_INLINE size_t BitSize() const { return end_offset_ - start_offset_; }
+
+  ALWAYS_INLINE int32_t Load(const MemoryRegion& region) const {
+    DCHECK_LE(end_offset_, region.size_in_bits());
+    const size_t bit_count = BitSize();
+    if (bit_count == 0) {
+      // Do not touch any memory if the range is empty.
+      return min_value_;
+    }
+    uint8_t* address = region.start() + start_offset_ / kBitsPerByte;
+    const uint32_t shift = start_offset_ & (kBitsPerByte - 1);
+    // Load the value (reading only the strictly needed bytes).
+    const uint32_t load_bit_count = shift + bit_count;
+    uint32_t value = *address++ >> shift;
+    if (load_bit_count > 8) {
+      value |= static_cast<uint32_t>(*address++) << (8 - shift);
+      if (load_bit_count > 16) {
+        value |= static_cast<uint32_t>(*address++) << (16 - shift);
+        if (load_bit_count > 24) {
+          value |= static_cast<uint32_t>(*address++) << (24 - shift);
+          if (load_bit_count > 32) {
+            value |= static_cast<uint32_t>(*address++) << (32 - shift);
+          }
+        }
+      }
+    }
+    // Clear unwanted most significant bits.
+    uint32_t clear_bit_count = 32 - bit_count;
+    value = (value << clear_bit_count) >> clear_bit_count;
+    return value + min_value_;
+  }
+
+  ALWAYS_INLINE void Store(MemoryRegion region, int32_t value) const {
+    region.StoreBits(start_offset_, value - min_value_, BitSize());
+    DCHECK_EQ(Load(region), value);
+  }
+
+ private:
+  size_t start_offset_;
+  size_t end_offset_;
+  int32_t min_value_;
+};
+
 class StackMapEncoding {
  public:
   StackMapEncoding() {}
 
-  StackMapEncoding(size_t stack_mask_size,
-                   size_t bytes_for_inline_info,
-                   size_t bytes_for_dex_register_map,
-                   size_t bytes_for_dex_pc,
-                   size_t bytes_for_native_pc,
-                   size_t bytes_for_register_mask)
-      : bytes_for_stack_mask_(stack_mask_size),
-        bytes_for_inline_info_(bytes_for_inline_info),
-        bytes_for_dex_register_map_(bytes_for_dex_register_map),
-        bytes_for_dex_pc_(bytes_for_dex_pc),
-        bytes_for_native_pc_(bytes_for_native_pc),
-        bytes_for_register_mask_(bytes_for_register_mask) {}
+  // Set stack map bit layout based on given sizes.
+  // Returns the size of stack map in bytes.
+  size_t SetFromSizes(size_t native_pc_max,
+                      size_t dex_pc_max,
+                      size_t dex_register_map_size,
+                      size_t inline_info_size,
+                      size_t register_mask_max,
+                      size_t stack_mask_bit_size) {
+    size_t bit_offset = 0;
+    DCHECK_EQ(kNativePcBitOffset, bit_offset);
+    bit_offset += MinimumBitsToStore(native_pc_max);
 
-  static StackMapEncoding CreateFromSizes(size_t stack_mask_size,
-                                          size_t inline_info_size,
-                                          size_t dex_register_map_size,
-                                          size_t dex_pc_max,
-                                          size_t native_pc_max,
-                                          size_t register_mask_max) {
-    return StackMapEncoding(
-        stack_mask_size,
-        // + 1 to also encode kNoInlineInfo: if an inline info offset
-        // is at 0xFF, we want to overflow to a larger encoding, because it will
-        // conflict with kNoInlineInfo.
-        // The offset is relative to the dex register map. TODO: Change this.
-        inline_info_size == 0
-          ? 0
-          : EncodingSizeInBytes(dex_register_map_size + inline_info_size + 1),
-        // + 1 to also encode kNoDexRegisterMap: if a dex register map offset
-        // is at 0xFF, we want to overflow to a larger encoding, because it will
-        // conflict with kNoDexRegisterMap.
-        EncodingSizeInBytes(dex_register_map_size + 1),
-        EncodingSizeInBytes(dex_pc_max),
-        EncodingSizeInBytes(native_pc_max),
-        EncodingSizeInBytes(register_mask_max));
+    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+
+    // We also need +1 for kNoDexRegisterMap, but since the size is strictly
+    // greater than any offset we might try to encode, we already implicitly have it.
+    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(dex_register_map_size);
+
+    // We also need +1 for kNoInlineInfo, but since the inline_info_size is strictly
+    // greater than the offset we might try to encode, we already implicitly have it.
+    // If inline_info_size is zero, we can encode only kNoInlineInfo (in zero bits).
+    inline_info_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    if (inline_info_size != 0) {
+      bit_offset += MinimumBitsToStore(dex_register_map_size + inline_info_size);
+    }
+
+    register_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += MinimumBitsToStore(register_mask_max);
+
+    stack_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    bit_offset += stack_mask_bit_size;
+
+    return RoundUp(bit_offset, kBitsPerByte) / kBitsPerByte;
   }
 
-  // Get the size of one stack map of this CodeInfo object, in bytes.
-  // All stack maps of a CodeInfo have the same size.
-  size_t ComputeStackMapSize() const {
-    return bytes_for_register_mask_
-         + bytes_for_stack_mask_
-         + bytes_for_inline_info_
-         + bytes_for_dex_register_map_
-         + bytes_for_dex_pc_
-         + bytes_for_native_pc_;
+  ALWAYS_INLINE FieldEncoding GetNativePcEncoding() const {
+    return FieldEncoding(kNativePcBitOffset, dex_pc_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
+    return FieldEncoding(dex_pc_bit_offset_, dex_register_map_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexRegisterMapEncoding() const {
+    return FieldEncoding(dex_register_map_bit_offset_, inline_info_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetInlineInfoEncoding() const {
+    return FieldEncoding(inline_info_bit_offset_, register_mask_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetRegisterMaskEncoding() const {
+    return FieldEncoding(register_mask_bit_offset_, stack_mask_bit_offset_);
+  }
+  ALWAYS_INLINE size_t GetStackMaskBitOffset() const {
+    // The end offset is not encoded. It is implicitly the end of stack map entry.
+    return stack_mask_bit_offset_;
   }
 
-  bool HasInlineInfo() const { return bytes_for_inline_info_ > 0; }
-
-  size_t NumberOfBytesForStackMask() const { return bytes_for_stack_mask_; }
-  size_t NumberOfBytesForInlineInfo() const { return bytes_for_inline_info_; }
-  size_t NumberOfBytesForDexRegisterMap() const { return bytes_for_dex_register_map_; }
-  size_t NumberOfBytesForDexPc() const { return bytes_for_dex_pc_; }
-  size_t NumberOfBytesForNativePc() const { return bytes_for_native_pc_; }
-  size_t NumberOfBytesForRegisterMask() const { return bytes_for_register_mask_; }
-
-  size_t ComputeStackMapRegisterMaskOffset() const {
-    return kRegisterMaskOffset;
-  }
-
-  size_t ComputeStackMapStackMaskOffset() const {
-    return ComputeStackMapRegisterMaskOffset() + bytes_for_register_mask_;
-  }
-
-  size_t ComputeStackMapDexPcOffset() const {
-    return ComputeStackMapStackMaskOffset() + bytes_for_stack_mask_;
-  }
-
-  size_t ComputeStackMapNativePcOffset() const {
-    return ComputeStackMapDexPcOffset() + bytes_for_dex_pc_;
-  }
-
-  size_t ComputeStackMapDexRegisterMapOffset() const {
-    return ComputeStackMapNativePcOffset() + bytes_for_native_pc_;
-  }
-
-  size_t ComputeStackMapInlineInfoOffset() const {
-    return ComputeStackMapDexRegisterMapOffset() + bytes_for_dex_register_map_;
-  }
+  void Dump(VariableIndentationOutputStream* vios) const;
 
  private:
-  static size_t EncodingSizeInBytes(size_t max_element) {
-    DCHECK(IsUint<32>(max_element));
-    return (max_element == 0) ? 0
-         : IsUint<8>(max_element) ? 1
-         : IsUint<16>(max_element) ? 2
-         : IsUint<24>(max_element) ? 3
-         : 4;
-  }
-
-  static constexpr int kRegisterMaskOffset = 0;
-
-  size_t bytes_for_stack_mask_;
-  size_t bytes_for_inline_info_;
-  size_t bytes_for_dex_register_map_;
-  size_t bytes_for_dex_pc_;
-  size_t bytes_for_native_pc_;
-  size_t bytes_for_register_mask_;
+  static constexpr size_t kNativePcBitOffset = 0;
+  uint8_t dex_pc_bit_offset_;
+  uint8_t dex_register_map_bit_offset_;
+  uint8_t inline_info_bit_offset_;
+  uint8_t register_mask_bit_offset_;
+  uint8_t stack_mask_bit_offset_;
 };
 
 /**
@@ -772,7 +785,7 @@
  *
  * The information is of the form:
  *
- *   [dex_pc, native_pc_offset, dex_register_map_offset, inlining_info_offset, register_mask,
+ *   [native_pc_offset, dex_pc, dex_register_map_offset, inlining_info_offset, register_mask,
  *   stack_mask].
  */
 class StackMap {
@@ -780,89 +793,75 @@
   StackMap() {}
   explicit StackMap(MemoryRegion region) : region_(region) {}
 
-  bool IsValid() const { return region_.pointer() != nullptr; }
+  ALWAYS_INLINE bool IsValid() const { return region_.pointer() != nullptr; }
 
-  uint32_t GetDexPc(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForDexPc(), encoding.ComputeStackMapDexPcOffset());
+  ALWAYS_INLINE uint32_t GetDexPc(const StackMapEncoding& encoding) const {
+    return encoding.GetDexPcEncoding().Load(region_);
   }
 
-  void SetDexPc(const StackMapEncoding& encoding, uint32_t dex_pc) {
-    StoreAt(encoding.NumberOfBytesForDexPc(), encoding.ComputeStackMapDexPcOffset(), dex_pc);
+  ALWAYS_INLINE void SetDexPc(const StackMapEncoding& encoding, uint32_t dex_pc) {
+    encoding.GetDexPcEncoding().Store(region_, dex_pc);
   }
 
-  uint32_t GetNativePcOffset(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForNativePc(), encoding.ComputeStackMapNativePcOffset());
+  ALWAYS_INLINE uint32_t GetNativePcOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetNativePcEncoding().Load(region_);
   }
 
-  void SetNativePcOffset(const StackMapEncoding& encoding, uint32_t native_pc_offset) {
-    StoreAt(encoding.NumberOfBytesForNativePc(),
-            encoding.ComputeStackMapNativePcOffset(),
-            native_pc_offset);
+  ALWAYS_INLINE void SetNativePcOffset(const StackMapEncoding& encoding, uint32_t native_pc_offset) {
+    encoding.GetNativePcEncoding().Store(region_, native_pc_offset);
   }
 
-  uint32_t GetDexRegisterMapOffset(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForDexRegisterMap(),
-                  encoding.ComputeStackMapDexRegisterMapOffset(),
-                  /* check_max */ true);
+  ALWAYS_INLINE uint32_t GetDexRegisterMapOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetDexRegisterMapEncoding().Load(region_);
   }
 
-  void SetDexRegisterMapOffset(const StackMapEncoding& encoding, uint32_t offset) {
-    StoreAt(encoding.NumberOfBytesForDexRegisterMap(),
-            encoding.ComputeStackMapDexRegisterMapOffset(),
-            offset);
+  ALWAYS_INLINE void SetDexRegisterMapOffset(const StackMapEncoding& encoding, uint32_t offset) {
+    encoding.GetDexRegisterMapEncoding().Store(region_, offset);
   }
 
-  uint32_t GetInlineDescriptorOffset(const StackMapEncoding& encoding) const {
-    if (!encoding.HasInlineInfo()) return kNoInlineInfo;
-    return LoadAt(encoding.NumberOfBytesForInlineInfo(),
-                  encoding.ComputeStackMapInlineInfoOffset(),
-                  /* check_max */ true);
+  ALWAYS_INLINE uint32_t GetInlineDescriptorOffset(const StackMapEncoding& encoding) const {
+    return encoding.GetInlineInfoEncoding().Load(region_);
   }
 
-  void SetInlineDescriptorOffset(const StackMapEncoding& encoding, uint32_t offset) {
-    DCHECK(encoding.HasInlineInfo());
-    StoreAt(encoding.NumberOfBytesForInlineInfo(),
-            encoding.ComputeStackMapInlineInfoOffset(),
-            offset);
+  ALWAYS_INLINE void SetInlineDescriptorOffset(const StackMapEncoding& encoding, uint32_t offset) {
+    encoding.GetInlineInfoEncoding().Store(region_, offset);
   }
 
-  uint32_t GetRegisterMask(const StackMapEncoding& encoding) const {
-    return LoadAt(encoding.NumberOfBytesForRegisterMask(),
-                  encoding.ComputeStackMapRegisterMaskOffset());
+  ALWAYS_INLINE uint32_t GetRegisterMask(const StackMapEncoding& encoding) const {
+    return encoding.GetRegisterMaskEncoding().Load(region_);
   }
 
-  void SetRegisterMask(const StackMapEncoding& encoding, uint32_t mask) {
-    StoreAt(encoding.NumberOfBytesForRegisterMask(),
-            encoding.ComputeStackMapRegisterMaskOffset(),
-            mask);
+  ALWAYS_INLINE void SetRegisterMask(const StackMapEncoding& encoding, uint32_t mask) {
+    encoding.GetRegisterMaskEncoding().Store(region_, mask);
   }
 
-  MemoryRegion GetStackMask(const StackMapEncoding& encoding) const {
-    return region_.Subregion(encoding.ComputeStackMapStackMaskOffset(),
-                             encoding.NumberOfBytesForStackMask());
+  ALWAYS_INLINE size_t GetNumberOfStackMaskBits(const StackMapEncoding& encoding) const {
+    return region_.size_in_bits() - encoding.GetStackMaskBitOffset();
   }
 
-  void SetStackMask(const StackMapEncoding& encoding, const BitVector& sp_map) {
-    MemoryRegion region = GetStackMask(encoding);
-    sp_map.CopyTo(region.start(), region.size());
+  ALWAYS_INLINE bool GetStackMaskBit(const StackMapEncoding& encoding, size_t index) const {
+    return region_.LoadBit(encoding.GetStackMaskBitOffset() + index);
   }
 
-  bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
+  ALWAYS_INLINE void SetStackMaskBit(const StackMapEncoding& encoding, size_t index, bool value) {
+    region_.StoreBit(encoding.GetStackMaskBitOffset() + index, value);
+  }
+
+  ALWAYS_INLINE bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
     return GetDexRegisterMapOffset(encoding) != kNoDexRegisterMap;
   }
 
-  bool HasInlineInfo(const StackMapEncoding& encoding) const {
+  ALWAYS_INLINE bool HasInlineInfo(const StackMapEncoding& encoding) const {
     return GetInlineDescriptorOffset(encoding) != kNoInlineInfo;
   }
 
-  bool Equals(const StackMap& other) const {
-    return region_.pointer() == other.region_.pointer()
-       && region_.size() == other.region_.size();
+  ALWAYS_INLINE bool Equals(const StackMap& other) const {
+    return region_.pointer() == other.region_.pointer() && region_.size() == other.region_.size();
   }
 
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& code_info,
-            const StackMapEncoding& encoding,
+            const CodeInfoEncoding& encoding,
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             const std::string& header_suffix = "") const;
@@ -878,250 +877,275 @@
  private:
   static constexpr int kFixedSize = 0;
 
-  // Loads `number_of_bytes` at the given `offset` and assemble a uint32_t. If `check_max` is true,
-  // this method converts a maximum value of size `number_of_bytes` into a uint32_t 0xFFFFFFFF.
-  uint32_t LoadAt(size_t number_of_bytes, size_t offset, bool check_max = false) const;
-  void StoreAt(size_t number_of_bytes, size_t offset, uint32_t value) const;
-
   MemoryRegion region_;
 
   friend class StackMapStream;
 };
 
+class InlineInfoEncoding {
+ public:
+  void SetFromSizes(size_t method_index_max,
+                    size_t dex_pc_max,
+                    size_t invoke_type_max,
+                    size_t dex_register_map_size) {
+    total_bit_size_ = kMethodIndexBitOffset;
+    total_bit_size_ += MinimumBitsToStore(method_index_max);
+
+    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+
+    invoke_type_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(invoke_type_max);
+
+    // We also need +1 for kNoDexRegisterMap, but since the size is strictly
+    // greater than any offset we might try to encode, we already implicitly have it.
+    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
+  }
+
+  ALWAYS_INLINE FieldEncoding GetMethodIndexEncoding() const {
+    return FieldEncoding(kMethodIndexBitOffset, dex_pc_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
+    return FieldEncoding(dex_pc_bit_offset_, invoke_type_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetInvokeTypeEncoding() const {
+    return FieldEncoding(invoke_type_bit_offset_, dex_register_map_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexRegisterMapEncoding() const {
+    return FieldEncoding(dex_register_map_bit_offset_, total_bit_size_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE size_t GetEntrySize() const {
+    return RoundUp(total_bit_size_, kBitsPerByte) / kBitsPerByte;
+  }
+
+  void Dump(VariableIndentationOutputStream* vios) const;
+
+ private:
+  static constexpr uint8_t kIsLastBitOffset = 0;
+  static constexpr uint8_t kMethodIndexBitOffset = 1;
+  uint8_t dex_pc_bit_offset_;
+  uint8_t invoke_type_bit_offset_;
+  uint8_t dex_register_map_bit_offset_;
+  uint8_t total_bit_size_;
+};
+
 /**
  * Inline information for a specific PC. The information is of the form:
  *
- *   [inlining_depth, entry+]
- *
- * where `entry` is of the form:
- *
- *   [dex_pc, method_index, dex_register_map_offset].
+ *   [is_last, method_index, dex_pc, invoke_type, dex_register_map_offset]+.
  */
 class InlineInfo {
  public:
-  // Memory layout: fixed contents.
-  typedef uint8_t DepthType;
-  // Memory layout: single entry contents.
-  typedef uint32_t MethodIndexType;
-  typedef uint32_t DexPcType;
-  typedef uint8_t InvokeTypeType;
-  typedef uint32_t DexRegisterMapType;
-
-  explicit InlineInfo(MemoryRegion region) : region_(region) {}
-
-  DepthType GetDepth() const {
-    return region_.LoadUnaligned<DepthType>(kDepthOffset);
+  explicit InlineInfo(MemoryRegion region) : region_(region) {
   }
 
-  void SetDepth(DepthType depth) {
-    region_.StoreUnaligned<DepthType>(kDepthOffset, depth);
+  ALWAYS_INLINE uint32_t GetDepth(const InlineInfoEncoding& encoding) const {
+    size_t depth = 0;
+    while (!GetRegionAtDepth(encoding, depth++).LoadBit(0)) { }  // Check is_last bit.
+    return depth;
   }
 
-  MethodIndexType GetMethodIndexAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset);
+  ALWAYS_INLINE void SetDepth(const InlineInfoEncoding& encoding, uint32_t depth) {
+    DCHECK_GT(depth, 0u);
+    for (size_t d = 0; d < depth; ++d) {
+      GetRegionAtDepth(encoding, d).StoreBit(0, d == depth - 1);  // Set is_last bit.
+    }
   }
 
-  void SetMethodIndexAtDepth(DepthType depth, MethodIndexType index) {
-    region_.StoreUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset, index);
+  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                               uint32_t depth) const {
+    return encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexPcType GetDexPcAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset);
+  ALWAYS_INLINE void SetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                           uint32_t depth,
+                                           uint32_t index) {
+    encoding.GetMethodIndexEncoding().Store(GetRegionAtDepth(encoding, depth), index);
   }
 
-  void SetDexPcAtDepth(DepthType depth, DexPcType dex_pc) {
-    region_.StoreUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset, dex_pc);
+  ALWAYS_INLINE uint32_t GetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                         uint32_t depth) const {
+    return encoding.GetDexPcEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  InvokeTypeType GetInvokeTypeAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset);
+  ALWAYS_INLINE void SetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                     uint32_t depth,
+                                     uint32_t dex_pc) {
+    encoding.GetDexPcEncoding().Store(GetRegionAtDepth(encoding, depth), dex_pc);
   }
 
-  void SetInvokeTypeAtDepth(DepthType depth, InvokeTypeType invoke_type) {
-    region_.StoreUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset, invoke_type);
+  ALWAYS_INLINE uint32_t GetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return encoding.GetInvokeTypeEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexRegisterMapType GetDexRegisterMapOffsetAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset);
+  ALWAYS_INLINE void SetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                          uint32_t depth,
+                                          uint32_t invoke_type) {
+    encoding.GetInvokeTypeEncoding().Store(GetRegionAtDepth(encoding, depth), invoke_type);
   }
 
-  void SetDexRegisterMapOffsetAtDepth(DepthType depth, DexRegisterMapType offset) {
-    region_.StoreUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset, offset);
+  ALWAYS_INLINE uint32_t GetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                        uint32_t depth) const {
+    return encoding.GetDexRegisterMapEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  bool HasDexRegisterMapAtDepth(DepthType depth) const {
-    return GetDexRegisterMapOffsetAtDepth(depth) != StackMap::kNoDexRegisterMap;
+  ALWAYS_INLINE void SetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                    uint32_t depth,
+                                                    uint32_t offset) {
+    encoding.GetDexRegisterMapEncoding().Store(GetRegionAtDepth(encoding, depth), offset);
   }
 
-  static size_t SingleEntrySize() {
-    return kFixedEntrySize;
+  ALWAYS_INLINE bool HasDexRegisterMapAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return GetDexRegisterMapOffsetAtDepth(encoding, depth) != StackMap::kNoDexRegisterMap;
   }
 
   void Dump(VariableIndentationOutputStream* vios,
-            const CodeInfo& info, uint16_t* number_of_dex_registers) const;
-
+            const CodeInfo& info,
+            uint16_t* number_of_dex_registers) const;
 
  private:
-  static constexpr int kDepthOffset = 0;
-  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(Depth);
-
-  static constexpr int kMethodIndexOffset = 0;
-  static constexpr int kDexPcOffset = ELEMENT_BYTE_OFFSET_AFTER(MethodIndex);
-  static constexpr int kInvokeTypeOffset = ELEMENT_BYTE_OFFSET_AFTER(DexPc);
-  static constexpr int kDexRegisterMapOffset = ELEMENT_BYTE_OFFSET_AFTER(InvokeType);
-  static constexpr int kFixedEntrySize = ELEMENT_BYTE_OFFSET_AFTER(DexRegisterMap);
+  ALWAYS_INLINE MemoryRegion GetRegionAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    size_t entry_size = encoding.GetEntrySize();
+    DCHECK_GT(entry_size, 0u);
+    return region_.Subregion(depth * entry_size, entry_size);
+  }
 
   MemoryRegion region_;
+};
 
-  friend class CodeInfo;
-  friend class StackMap;
-  friend class StackMapStream;
+// Most of the fields are encoded as ULEB128 to save space.
+struct CodeInfoEncoding {
+  uint32_t non_header_size;
+  uint32_t number_of_stack_maps;
+  uint32_t stack_map_size_in_bytes;
+  uint32_t number_of_location_catalog_entries;
+  StackMapEncoding stack_map_encoding;
+  InlineInfoEncoding inline_info_encoding;
+  uint8_t header_size;
+
+  CodeInfoEncoding() { }
+
+  explicit CodeInfoEncoding(const void* data) {
+    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
+    non_header_size = DecodeUnsignedLeb128(&ptr);
+    number_of_stack_maps = DecodeUnsignedLeb128(&ptr);
+    stack_map_size_in_bytes = DecodeUnsignedLeb128(&ptr);
+    number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr);
+    static_assert(alignof(StackMapEncoding) == 1,
+                  "StackMapEncoding should not require alignment");
+    stack_map_encoding = *reinterpret_cast<const StackMapEncoding*>(ptr);
+    ptr += sizeof(StackMapEncoding);
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      static_assert(alignof(InlineInfoEncoding) == 1,
+                    "InlineInfoEncoding should not require alignment");
+      inline_info_encoding = *reinterpret_cast<const InlineInfoEncoding*>(ptr);
+      ptr += sizeof(InlineInfoEncoding);
+    } else {
+      inline_info_encoding = InlineInfoEncoding{}; // NOLINT.
+    }
+    header_size = dchecked_integral_cast<uint8_t>(ptr - reinterpret_cast<const uint8_t*>(data));
+  }
+
+  template<typename Vector>
+  void Compress(Vector* dest) const {
+    EncodeUnsignedLeb128(dest, non_header_size);
+    EncodeUnsignedLeb128(dest, number_of_stack_maps);
+    EncodeUnsignedLeb128(dest, stack_map_size_in_bytes);
+    EncodeUnsignedLeb128(dest, number_of_location_catalog_entries);
+    const uint8_t* stack_map_ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
+    dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding));
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      const uint8_t* inline_info_ptr = reinterpret_cast<const uint8_t*>(&inline_info_encoding);
+      dest->insert(dest->end(), inline_info_ptr, inline_info_ptr + sizeof(InlineInfoEncoding));
+    }
+  }
 };
 
 /**
  * Wrapper around all compiler information collected for a method.
  * The information is of the form:
  *
- *   [overall_size, encoding_info, number_of_location_catalog_entries, number_of_stack_maps,
- *   stack_mask_size, DexRegisterLocationCatalog+, StackMap+, DexRegisterMap+, InlineInfo*]
+ *   [CodeInfoEncoding, StackMap+, DexRegisterLocationCatalog+, DexRegisterMap+, InlineInfo*]
  *
- * where `encoding_info` is of the form:
+ * where CodeInfoEncoding is of the form:
  *
- *  [has_inline_info, inline_info_size_in_bytes, dex_register_map_size_in_bytes,
- *  dex_pc_size_in_bytes, native_pc_size_in_bytes, register_mask_size_in_bytes].
+ *   [non_header_size, number_of_stack_maps, stack_map_size_in_bytes,
+ *    number_of_location_catalog_entries, StackMapEncoding]
  */
 class CodeInfo {
  public:
-  // Memory layout: fixed contents.
-  typedef uint32_t OverallSizeType;
-  typedef uint16_t EncodingInfoType;
-  typedef uint32_t NumberOfLocationCatalogEntriesType;
-  typedef uint32_t NumberOfStackMapsType;
-  typedef uint32_t StackMaskSizeType;
-
-  // Memory (bit) layout: encoding info.
-  static constexpr int HasInlineInfoBitSize = 1;
-  static constexpr int InlineInfoBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int DexRegisterMapBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int DexPcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int NativePcBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-  static constexpr int RegisterMaskBitSize = kNumberOfBitForNumberOfBytesForEncoding;
-
-  explicit CodeInfo(MemoryRegion region) : region_(region) {}
+  explicit CodeInfo(MemoryRegion region) : region_(region) {
+  }
 
   explicit CodeInfo(const void* data) {
-    uint32_t size = reinterpret_cast<const uint32_t*>(data)[0];
-    region_ = MemoryRegion(const_cast<void*>(data), size);
+    CodeInfoEncoding encoding = CodeInfoEncoding(data);
+    region_ = MemoryRegion(const_cast<void*>(data),
+                           encoding.header_size + encoding.non_header_size);
   }
 
-  StackMapEncoding ExtractEncoding() const {
-    return StackMapEncoding(region_.LoadUnaligned<uint32_t>(kStackMaskSizeOffset),
-                            GetNumberOfBytesForEncoding(kInlineInfoBitOffset),
-                            GetNumberOfBytesForEncoding(kDexRegisterMapBitOffset),
-                            GetNumberOfBytesForEncoding(kDexPcBitOffset),
-                            GetNumberOfBytesForEncoding(kNativePcBitOffset),
-                            GetNumberOfBytesForEncoding(kRegisterMaskBitOffset));
+  CodeInfoEncoding ExtractEncoding() const {
+    return CodeInfoEncoding(region_.start());
   }
 
-  void SetEncoding(const StackMapEncoding& encoding) {
-    region_.StoreUnaligned<uint32_t>(kStackMaskSizeOffset, encoding.NumberOfBytesForStackMask());
-    region_.StoreBit(kHasInlineInfoBitOffset, encoding.NumberOfBytesForInlineInfo() != 0);
-    SetEncodingAt(kInlineInfoBitOffset, encoding.NumberOfBytesForInlineInfo());
-    SetEncodingAt(kDexRegisterMapBitOffset, encoding.NumberOfBytesForDexRegisterMap());
-    SetEncodingAt(kDexPcBitOffset, encoding.NumberOfBytesForDexPc());
-    SetEncodingAt(kNativePcBitOffset, encoding.NumberOfBytesForNativePc());
-    SetEncodingAt(kRegisterMaskBitOffset, encoding.NumberOfBytesForRegisterMask());
+  bool HasInlineInfo(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0;
   }
 
-  void SetEncodingAt(size_t bit_offset, size_t number_of_bytes) {
-    region_.StoreBits(bit_offset, number_of_bytes, kNumberOfBitForNumberOfBytesForEncoding);
-  }
-
-  size_t GetNumberOfBytesForEncoding(size_t bit_offset) const {
-    return region_.LoadBits(bit_offset, kNumberOfBitForNumberOfBytesForEncoding);
-  }
-
-  bool HasInlineInfo() const {
-    return region_.LoadBit(kHasInlineInfoBitOffset);
-  }
-
-  DexRegisterLocationCatalog GetDexRegisterLocationCatalog(const StackMapEncoding& encoding) const {
+  DexRegisterLocationCatalog GetDexRegisterLocationCatalog(const CodeInfoEncoding& encoding) const {
     return DexRegisterLocationCatalog(region_.Subregion(
         GetDexRegisterLocationCatalogOffset(encoding),
         GetDexRegisterLocationCatalogSize(encoding)));
   }
 
-  StackMap GetStackMapAt(size_t i, const StackMapEncoding& encoding) const {
-    size_t stack_map_size = encoding.ComputeStackMapSize();
+  StackMap GetStackMapAt(size_t i, const CodeInfoEncoding& encoding) const {
+    size_t stack_map_size = encoding.stack_map_size_in_bytes;
     return StackMap(GetStackMaps(encoding).Subregion(i * stack_map_size, stack_map_size));
   }
 
-  OverallSizeType GetOverallSize() const {
-    return region_.LoadUnaligned<OverallSizeType>(kOverallSizeOffset);
+  uint32_t GetNumberOfLocationCatalogEntries(const CodeInfoEncoding& encoding) const {
+    return encoding.number_of_location_catalog_entries;
   }
 
-  void SetOverallSize(OverallSizeType size) {
-    region_.StoreUnaligned<OverallSizeType>(kOverallSizeOffset, size);
-  }
-
-  NumberOfLocationCatalogEntriesType GetNumberOfLocationCatalogEntries() const {
-    return region_.LoadUnaligned<NumberOfLocationCatalogEntriesType>(
-        kNumberOfLocationCatalogEntriesOffset);
-  }
-
-  void SetNumberOfLocationCatalogEntries(NumberOfLocationCatalogEntriesType num_entries) {
-    region_.StoreUnaligned<NumberOfLocationCatalogEntriesType>(
-        kNumberOfLocationCatalogEntriesOffset, num_entries);
-  }
-
-  uint32_t GetDexRegisterLocationCatalogSize(const StackMapEncoding& encoding) const {
+  uint32_t GetDexRegisterLocationCatalogSize(const CodeInfoEncoding& encoding) const {
     return ComputeDexRegisterLocationCatalogSize(GetDexRegisterLocationCatalogOffset(encoding),
-                                                 GetNumberOfLocationCatalogEntries());
+                                                 GetNumberOfLocationCatalogEntries(encoding));
   }
 
-  NumberOfStackMapsType GetNumberOfStackMaps() const {
-    return region_.LoadUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset);
-  }
-
-  void SetNumberOfStackMaps(NumberOfStackMapsType number_of_stack_maps) {
-    region_.StoreUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset, number_of_stack_maps);
+  uint32_t GetNumberOfStackMaps(const CodeInfoEncoding& encoding) const {
+    return encoding.number_of_stack_maps;
   }
 
   // Get the size of all the stack maps of this CodeInfo object, in bytes.
-  size_t GetStackMapsSize(const StackMapEncoding& encoding) const {
-    return encoding.ComputeStackMapSize() * GetNumberOfStackMaps();
+  size_t GetStackMapsSize(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_size_in_bytes * GetNumberOfStackMaps(encoding);
   }
 
-  uint32_t GetDexRegisterLocationCatalogOffset(const StackMapEncoding& encoding) const {
-    return GetStackMapsOffset() + GetStackMapsSize(encoding);
+  uint32_t GetDexRegisterLocationCatalogOffset(const CodeInfoEncoding& encoding) const {
+    return GetStackMapsOffset(encoding) + GetStackMapsSize(encoding);
   }
 
-  size_t GetDexRegisterMapsOffset(const StackMapEncoding& encoding) const {
+  size_t GetDexRegisterMapsOffset(const CodeInfoEncoding& encoding) const {
     return GetDexRegisterLocationCatalogOffset(encoding)
          + GetDexRegisterLocationCatalogSize(encoding);
   }
 
-  uint32_t GetStackMapsOffset() const {
-    return kFixedSize;
+  uint32_t GetStackMapsOffset(const CodeInfoEncoding& encoding) const {
+    return encoding.header_size;
   }
 
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
-                                     const StackMapEncoding& encoding,
+                                     const CodeInfoEncoding& encoding,
                                      uint32_t number_of_dex_registers) const {
-    if (!stack_map.HasDexRegisterMap(encoding)) {
+    if (!stack_map.HasDexRegisterMap(encoding.stack_map_encoding)) {
       return DexRegisterMap();
     } else {
       uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                        + stack_map.GetDexRegisterMapOffset(encoding);
-      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+                        + stack_map.GetDexRegisterMapOffset(encoding.stack_map_encoding);
+      size_t size = ComputeDexRegisterMapSizeOf(encoding, offset, number_of_dex_registers);
       return DexRegisterMap(region_.Subregion(offset, size));
     }
   }
@@ -1129,31 +1153,29 @@
   // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
   DexRegisterMap GetDexRegisterMapAtDepth(uint8_t depth,
                                           InlineInfo inline_info,
-                                          const StackMapEncoding& encoding,
+                                          const CodeInfoEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    if (!inline_info.HasDexRegisterMapAtDepth(depth)) {
+    if (!inline_info.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, depth)) {
       return DexRegisterMap();
     } else {
-      uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                        + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
-      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      uint32_t offset = GetDexRegisterMapsOffset(encoding) +
+          inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info_encoding, depth);
+      size_t size = ComputeDexRegisterMapSizeOf(encoding, offset, number_of_dex_registers);
       return DexRegisterMap(region_.Subregion(offset, size));
     }
   }
 
-  InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const {
-    DCHECK(stack_map.HasInlineInfo(encoding));
-    uint32_t offset = stack_map.GetInlineDescriptorOffset(encoding)
+  InlineInfo GetInlineInfoOf(StackMap stack_map, const CodeInfoEncoding& encoding) const {
+    DCHECK(stack_map.HasInlineInfo(encoding.stack_map_encoding));
+    uint32_t offset = stack_map.GetInlineDescriptorOffset(encoding.stack_map_encoding)
                       + GetDexRegisterMapsOffset(encoding);
-    uint8_t depth = region_.LoadUnaligned<uint8_t>(offset);
-    return InlineInfo(region_.Subregion(offset,
-        InlineInfo::kFixedSize + depth * InlineInfo::SingleEntrySize()));
+    return InlineInfo(region_.Subregion(offset, region_.size() - offset));
   }
 
-  StackMap GetStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
-    for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
+  StackMap GetStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    for (size_t i = 0, e = GetNumberOfStackMaps(encoding); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
-      if (stack_map.GetDexPc(encoding) == dex_pc) {
+      if (stack_map.GetDexPc(encoding.stack_map_encoding) == dex_pc) {
         return stack_map;
       }
     }
@@ -1162,37 +1184,39 @@
 
   // Searches the stack map list backwards because catch stack maps are stored
   // at the end.
-  StackMap GetCatchStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
-    for (size_t i = GetNumberOfStackMaps(); i > 0; --i) {
+  StackMap GetCatchStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    for (size_t i = GetNumberOfStackMaps(encoding); i > 0; --i) {
       StackMap stack_map = GetStackMapAt(i - 1, encoding);
-      if (stack_map.GetDexPc(encoding) == dex_pc) {
+      if (stack_map.GetDexPc(encoding.stack_map_encoding) == dex_pc) {
         return stack_map;
       }
     }
     return StackMap();
   }
 
-  StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
-    size_t e = GetNumberOfStackMaps();
+  StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
+    size_t e = GetNumberOfStackMaps(encoding);
     if (e == 0) {
       // There cannot be OSR stack map if there is no stack map.
       return StackMap();
     }
     // Walk over all stack maps. If two consecutive stack maps are identical, then we
     // have found a stack map suitable for OSR.
+    const StackMapEncoding& stack_map_encoding = encoding.stack_map_encoding;
     for (size_t i = 0; i < e - 1; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
-      if (stack_map.GetDexPc(encoding) == dex_pc) {
+      if (stack_map.GetDexPc(stack_map_encoding) == dex_pc) {
         StackMap other = GetStackMapAt(i + 1, encoding);
-        if (other.GetDexPc(encoding) == dex_pc &&
-            other.GetNativePcOffset(encoding) == stack_map.GetNativePcOffset(encoding)) {
-          DCHECK_EQ(other.GetDexRegisterMapOffset(encoding),
-                    stack_map.GetDexRegisterMapOffset(encoding));
-          DCHECK(!stack_map.HasInlineInfo(encoding));
+        if (other.GetDexPc(stack_map_encoding) == dex_pc &&
+            other.GetNativePcOffset(stack_map_encoding) ==
+                stack_map.GetNativePcOffset(stack_map_encoding)) {
+          DCHECK_EQ(other.GetDexRegisterMapOffset(stack_map_encoding),
+                    stack_map.GetDexRegisterMapOffset(stack_map_encoding));
+          DCHECK(!stack_map.HasInlineInfo(stack_map_encoding));
           if (i < e - 2) {
             // Make sure there are not three identical stack maps following each other.
-            DCHECK_NE(stack_map.GetNativePcOffset(encoding),
-                      GetStackMapAt(i + 2, encoding).GetNativePcOffset(encoding));
+            DCHECK_NE(stack_map.GetNativePcOffset(stack_map_encoding),
+                      GetStackMapAt(i + 2, encoding).GetNativePcOffset(stack_map_encoding));
           }
           return stack_map;
         }
@@ -1202,13 +1226,13 @@
   }
 
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
-                                        const StackMapEncoding& encoding) const {
+                                        const CodeInfoEncoding& encoding) const {
     // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
     //       maps are not. If we knew that the method does not have try/catch,
     //       we could do binary search.
-    for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
+    for (size_t i = 0, e = GetNumberOfStackMaps(encoding); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
-      if (stack_map.GetNativePcOffset(encoding) == native_pc_offset) {
+      if (stack_map.GetNativePcOffset(encoding.stack_map_encoding) == native_pc_offset) {
         return stack_map;
       }
     }
@@ -1226,38 +1250,16 @@
             bool dump_stack_maps) const;
 
  private:
-  static constexpr int kOverallSizeOffset = 0;
-  static constexpr int kEncodingInfoOffset = ELEMENT_BYTE_OFFSET_AFTER(OverallSize);
-  static constexpr int kNumberOfLocationCatalogEntriesOffset =
-      ELEMENT_BYTE_OFFSET_AFTER(EncodingInfo);
-  static constexpr int kNumberOfStackMapsOffset =
-      ELEMENT_BYTE_OFFSET_AFTER(NumberOfLocationCatalogEntries);
-  static constexpr int kStackMaskSizeOffset = ELEMENT_BYTE_OFFSET_AFTER(NumberOfStackMaps);
-  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(StackMaskSize);
-
-  static constexpr int kHasInlineInfoBitOffset = kEncodingInfoOffset * kBitsPerByte;
-  static constexpr int kInlineInfoBitOffset = ELEMENT_BIT_OFFSET_AFTER(HasInlineInfo);
-  static constexpr int kDexRegisterMapBitOffset = ELEMENT_BIT_OFFSET_AFTER(InlineInfo);
-  static constexpr int kDexPcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexRegisterMap);
-  static constexpr int kNativePcBitOffset = ELEMENT_BIT_OFFSET_AFTER(DexPc);
-  static constexpr int kRegisterMaskBitOffset = ELEMENT_BIT_OFFSET_AFTER(NativePc);
-
-  static constexpr int kEncodingInfoPastTheEndBitOffset = ELEMENT_BIT_OFFSET_AFTER(RegisterMask);
-  static constexpr int kEncodingInfoOverallBitSize =
-      kEncodingInfoPastTheEndBitOffset - kHasInlineInfoBitOffset;
-
-  static_assert(kEncodingInfoOverallBitSize <= (sizeof(EncodingInfoType) * kBitsPerByte),
-                "art::CodeInfo::EncodingInfoType is too short to hold all encoding info elements.");
-
-  MemoryRegion GetStackMaps(const StackMapEncoding& encoding) const {
+  MemoryRegion GetStackMaps(const CodeInfoEncoding& encoding) const {
     return region_.size() == 0
         ? MemoryRegion()
-        : region_.Subregion(GetStackMapsOffset(), GetStackMapsSize(encoding));
+        : region_.Subregion(GetStackMapsOffset(encoding), GetStackMapsSize(encoding));
   }
 
   // Compute the size of the Dex register map associated to the stack map at
   // `dex_register_map_offset_in_code_info`.
-  size_t ComputeDexRegisterMapSizeOf(uint32_t dex_register_map_offset_in_code_info,
+  size_t ComputeDexRegisterMapSizeOf(const CodeInfoEncoding& encoding,
+                                     uint32_t dex_register_map_offset_in_code_info,
                                      uint16_t number_of_dex_registers) const {
     // Offset where the actual mapping data starts within art::DexRegisterMap.
     size_t location_mapping_data_offset_in_dex_register_map =
@@ -1270,7 +1272,7 @@
     size_t number_of_live_dex_registers =
         dex_register_map_without_locations.GetNumberOfLiveDexRegisters(number_of_dex_registers);
     size_t location_mapping_data_size_in_bits =
-        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfLocationCatalogEntries())
+        DexRegisterMap::SingleEntrySizeInBits(GetNumberOfLocationCatalogEntries(encoding))
         * number_of_live_dex_registers;
     size_t location_mapping_data_size_in_bytes =
         RoundUp(location_mapping_data_size_in_bits, kBitsPerByte) / kBitsPerByte;
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index f5d20bd..d98f82a 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,7 +19,7 @@
 
 #include "thread.h"
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <bionic_tls.h>  // Access to our own TLS slot.
 #endif
 
@@ -45,7 +45,7 @@
   if (!is_started_) {
     return nullptr;
   } else {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
 #else
     void* thread = pthread_getspecific(Thread::pthread_key_self_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 42b5a4a..fb24828 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -42,7 +42,6 @@
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
-#include "gc_map.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/allocator/rosalloc.h"
@@ -70,10 +69,8 @@
 #include "thread_list.h"
 #include "thread-inl.h"
 #include "utils.h"
-#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "verify_object-inl.h"
-#include "vmap_table.h"
 #include "well_known_classes.h"
 #include "interpreter/interpreter.h"
 
@@ -91,8 +88,10 @@
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
 const size_t Thread::kStackOverflowImplicitCheckSize = GetStackOverflowReservedBytes(kRuntimeISA);
-// Enabled for b/27493510. TODO: disable when fixed.
-static constexpr bool kVerifyImageObjectsMarked = true;
+bool (*Thread::is_sensitive_thread_hook_)() = nullptr;
+Thread* Thread::jit_sensitive_thread_ = nullptr;
+
+static constexpr bool kVerifyImageObjectsMarked = kIsDebugBuild;
 
 // For implicit overflow checks we reserve an extra piece of memory at the bottom
 // of the stack (lowest memory).  The higher portion of the memory
@@ -706,7 +705,7 @@
   InitTid();
   interpreter::InitInterpreterTls(this);
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
@@ -741,7 +740,7 @@
   {
     MutexLock mu(nullptr, *Locks::runtime_shutdown_lock_);
     if (runtime->IsShuttingDownLocked()) {
-      LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
+      LOG(WARNING) << "Thread attaching while runtime is shutting down: " << thread_name;
       return nullptr;
     } else {
       Runtime::Current()->StartThreadBirth();
@@ -1544,7 +1543,7 @@
     LOG(WARNING) << "Native thread exiting without having called DetachCurrentThread (maybe it's "
         "going to use a pthread_key_create destructor?): " << *self;
     CHECK(is_started_);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
     __get_tls()[TLS_SLOT_ART_THREAD_SELF] = self;
 #else
     CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, self), "reattach self");
@@ -2394,7 +2393,7 @@
   std::string str(ss.str());
   // log to stderr for debugging command line processes
   std::cerr << str;
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   // log to logcat for debugging frameworks processes
   LOG(INFO) << str;
 #endif
@@ -2767,83 +2766,36 @@
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-      if (method_header->IsOptimized()) {
-        auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
-            reinterpret_cast<uintptr_t>(cur_quick_frame));
-        uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-        CodeInfo code_info = method_header->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(map.IsValid());
-        MemoryRegion mask = map.GetStackMask(encoding);
-        // Visit stack entries that hold pointers.
-        for (size_t i = 0; i < mask.size_in_bits(); ++i) {
-          if (mask.LoadBit(i)) {
-            auto* ref_addr = vreg_base + i;
-            mirror::Object* ref = ref_addr->AsMirrorPtr();
-            if (ref != nullptr) {
-              mirror::Object* new_ref = ref;
-              visitor_(&new_ref, -1, this);
-              if (ref != new_ref) {
-                ref_addr->Assign(new_ref);
-              }
+      DCHECK(method_header->IsOptimized());
+      auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
+          reinterpret_cast<uintptr_t>(cur_quick_frame));
+      uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
+      CodeInfo code_info = method_header->GetOptimizedCodeInfo();
+      CodeInfoEncoding encoding = code_info.ExtractEncoding();
+      StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+      DCHECK(map.IsValid());
+      // Visit stack entries that hold pointers.
+      size_t number_of_bits = map.GetNumberOfStackMaskBits(encoding.stack_map_encoding);
+      for (size_t i = 0; i < number_of_bits; ++i) {
+        if (map.GetStackMaskBit(encoding.stack_map_encoding, i)) {
+          auto* ref_addr = vreg_base + i;
+          mirror::Object* ref = ref_addr->AsMirrorPtr();
+          if (ref != nullptr) {
+            mirror::Object* new_ref = ref;
+            visitor_(&new_ref, -1, this);
+            if (ref != new_ref) {
+              ref_addr->Assign(new_ref);
             }
           }
         }
-        // Visit callee-save registers that hold pointers.
-        uint32_t register_mask = map.GetRegisterMask(encoding);
-        for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) {
-          if (register_mask & (1 << i)) {
-            mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i));
-            if (*ref_addr != nullptr) {
-              visitor_(ref_addr, -1, this);
-            }
-          }
-        }
-      } else {
-        const uint8_t* native_gc_map = method_header->GetNativeGcMap();
-        CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-        const DexFile::CodeItem* code_item = m->GetCodeItem();
-        // Can't be null or how would we compile its instructions?
-        DCHECK(code_item != nullptr) << PrettyMethod(m);
-        NativePcOffsetToReferenceMap map(native_gc_map);
-        size_t num_regs = map.RegWidth() * 8;
-        if (num_regs > 0) {
-          uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
-          const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
-          DCHECK(reg_bitmap != nullptr);
-          const VmapTable vmap_table(method_header->GetVmapTable());
-          QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-          // For all dex registers in the bitmap
-          DCHECK(cur_quick_frame != nullptr);
-          for (size_t reg = 0; reg < num_regs; ++reg) {
-            // Does this register hold a reference?
-            if (TestBitmap(reg, reg_bitmap)) {
-              uint32_t vmap_offset;
-              if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-                int vmap_reg = vmap_table.ComputeRegister(frame_info.CoreSpillMask(), vmap_offset,
-                                                          kReferenceVReg);
-                // This is sound as spilled GPRs will be word sized (ie 32 or 64bit).
-                mirror::Object** ref_addr =
-                    reinterpret_cast<mirror::Object**>(GetGPRAddress(vmap_reg));
-                if (*ref_addr != nullptr) {
-                  visitor_(ref_addr, reg, this);
-                }
-              } else {
-                StackReference<mirror::Object>* ref_addr =
-                    reinterpret_cast<StackReference<mirror::Object>*>(GetVRegAddrFromQuickCode(
-                        cur_quick_frame, code_item, frame_info.CoreSpillMask(),
-                        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), reg));
-                mirror::Object* ref = ref_addr->AsMirrorPtr();
-                if (ref != nullptr) {
-                  mirror::Object* new_ref = ref;
-                  visitor_(&new_ref, reg, this);
-                  if (ref != new_ref) {
-                    ref_addr->Assign(new_ref);
-                  }
-                }
-              }
-            }
+      }
+      // Visit callee-save registers that hold pointers.
+      uint32_t register_mask = map.GetRegisterMask(encoding.stack_map_encoding);
+      for (size_t i = 0; i < BitSizeOf<uint32_t>(); ++i) {
+        if (register_mask & (1 << i)) {
+          mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(i));
+          if (*ref_addr != nullptr) {
+            visitor_(ref_addr, -1, this);
           }
         }
       }
@@ -3059,7 +3011,6 @@
   return count;
 }
 
-
 void Thread::DeoptimizeWithDeoptimizationException(JValue* result) {
   DCHECK_EQ(GetException(), Thread::GetDeoptimizationException());
   ClearException();
@@ -3080,4 +3031,11 @@
   interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
 }
 
+void Thread::SetException(mirror::Throwable* new_exception) {
+  CHECK(new_exception != nullptr);
+  // TODO: DCHECK(!IsExceptionPending());
+  tlsPtr_.exception = new_exception;
+  // LOG(ERROR) << new_exception->Dump();
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 3123c71..582a0cd 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -312,6 +312,7 @@
    */
   static int GetNativePriority();
 
+  // Guaranteed to be non-zero.
   uint32_t GetThreadId() const {
     return tls32_.thin_lock_thread_id;
   }
@@ -362,12 +363,7 @@
   void AssertNoPendingException() const;
   void AssertNoPendingExceptionForNewException(const char* msg) const;
 
-  void SetException(mirror::Throwable* new_exception)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK(new_exception != nullptr);
-    // TODO: DCHECK(!IsExceptionPending());
-    tlsPtr_.exception = new_exception;
-  }
+  void SetException(mirror::Throwable* new_exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ClearException() SHARED_REQUIRES(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
@@ -1097,6 +1093,19 @@
     return debug_disallow_read_barrier_;
   }
 
+  // Returns true if the current thread is the jit sensitive thread.
+  bool IsJitSensitiveThread() const {
+    return this == jit_sensitive_thread_;
+  }
+
+  // Returns true if StrictMode events are traced for the current thread.
+  static bool IsSensitiveThread() {
+    if (is_sensitive_thread_hook_ != nullptr) {
+      return (*is_sensitive_thread_hook_)();
+    }
+    return false;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1172,6 +1181,20 @@
   ALWAYS_INLINE void PassActiveSuspendBarriers()
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
 
+  // Registers the current thread as the jit sensitive thread. Should be called just once.
+  static void SetJitSensitiveThread() {
+    if (jit_sensitive_thread_ == nullptr) {
+      jit_sensitive_thread_ = Thread::Current();
+    } else {
+      LOG(WARNING) << "Attempt to set the sensitive thread twice. Tid:"
+          << Thread::Current()->GetTid();
+    }
+  }
+
+  static void SetSensitiveThreadHook(bool (*is_sensitive_thread_hook)()) {
+    is_sensitive_thread_hook_ = is_sensitive_thread_hook;
+  }
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
@@ -1214,6 +1237,12 @@
   // their suspend count is > 0.
   static ConditionVariable* resume_cond_ GUARDED_BY(Locks::thread_suspend_count_lock_);
 
+  // Hook passed by framework which returns true
+  // when StrictMode events are traced for the current thread.
+  static bool (*is_sensitive_thread_hook_)();
+  // Stores the jit sensitive thread (which for now is the UI thread).
+  static Thread* jit_sensitive_thread_;
+
   /***********************************************************************************************/
   // Thread local storage. Fields are grouped by size to enable 32 <-> 64 searching to account for
   // pointer size differences. To encourage shorter encoding, more frequently used values appear
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 9563b99..b922d94 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -44,7 +44,7 @@
 
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   LOG(FATAL) << "Invalid use of alternate signal stack on Android";
 #endif
   stack_t ss;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index a9ce056..da21479 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -923,12 +923,9 @@
   }
 }
 
-Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, *Locks::thread_list_lock_);
+Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
   for (const auto& thread : list_) {
-    if (thread->GetThreadId() == thin_lock_id) {
-      CHECK(thread == self || thread->IsSuspended());
+    if (thread->GetThreadId() == thread_id) {
       return thread;
     }
   }
@@ -1280,7 +1277,7 @@
 
   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   // (It may wish to reattach later.)
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
 #else
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index f97ecd3..df81ad1 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -89,8 +89,8 @@
                !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
-  // Find an already suspended thread (or self) by its id.
-  Thread* FindThreadByThreadId(uint32_t thin_lock_id);
+  // Find an existing thread (or self) by its thread id (not tid).
+  Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_);
 
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
   // of the suspend check. Returns how many checkpoints that are expected to run, including for
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 2fba805..b14f340 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -61,7 +61,7 @@
 void ThreadPoolWorker::SetPthreadPriority(int priority) {
   CHECK_GE(priority, PRIO_MIN);
   CHECK_LE(priority, PRIO_MAX);
-#if defined(__ANDROID__)
+#if defined(ART_TARGET_ANDROID)
   int result = setpriority(PRIO_PROCESS, pthread_gettid_np(pthread_), priority);
   if (result != 0) {
     PLOG(ERROR) << "Failed to setpriority to :" << priority;
diff --git a/runtime/utf.cc b/runtime/utf.cc
index a2d6363..5e9fdf7 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -178,6 +178,23 @@
   return static_cast<int32_t>(hash);
 }
 
+int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length) {
+  uint32_t hash = 0;
+  while (utf16_length != 0u) {
+    const uint32_t pair = GetUtf16FromUtf8(&utf8);
+    const uint16_t first = GetLeadingUtf16Char(pair);
+    hash = hash * 31 + first;
+    --utf16_length;
+    const uint16_t second = GetTrailingUtf16Char(pair);
+    if (second != 0) {
+      hash = hash * 31 + second;
+      DCHECK_NE(utf16_length, 0u);
+      --utf16_length;
+    }
+  }
+  return static_cast<int32_t>(hash);
+}
+
 uint32_t ComputeModifiedUtf8Hash(const char* chars) {
   uint32_t hash = 0;
   while (*chars != '\0') {
diff --git a/runtime/utf.h b/runtime/utf.h
index 4abd605..27d2fd5 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -83,6 +83,7 @@
 int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count)
     SHARED_REQUIRES(Locks::mutator_lock_);
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
+int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length);
 
 // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a
 // uint32_t and hashes individual chars instead of codepoint words.
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 472a85c..6a50b8e 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1459,6 +1459,14 @@
   return stat(filename.c_str(), &buffer) == 0;
 }
 
+bool FileExistsAndNotEmpty(const std::string& filename) {
+  struct stat buffer;
+  if (stat(filename.c_str(), &buffer) != 0) {
+    return false;
+  }
+  return buffer.st_size > 0;
+}
+
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index 83ac0b8..c1e88a4 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -296,6 +296,7 @@
 
 // Returns true if the file exists.
 bool FileExists(const std::string& filename);
+bool FileExistsAndNotEmpty(const std::string& filename);
 
 class VoidFunctor {
  public:
diff --git a/runtime/verifier/dex_gc_map.cc b/runtime/verifier/dex_gc_map.cc
deleted file mode 100644
index c435f9f..0000000
--- a/runtime/verifier/dex_gc_map.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "verifier/dex_gc_map.h"
-
-#include "base/logging.h"
-
-namespace art {
-namespace verifier {
-
-const uint8_t* DexPcToReferenceMap::FindBitMap(uint16_t dex_pc, bool error_if_not_present) const {
-  size_t num_entries = NumEntries();
-  // Do linear or binary search?
-  static const size_t kSearchThreshold = 8;
-  if (num_entries < kSearchThreshold) {
-    for (size_t i = 0; i < num_entries; i++)  {
-      if (GetDexPc(i) == dex_pc) {
-        return GetBitMap(i);
-      }
-    }
-  } else {
-    int lo = 0;
-    int hi = num_entries -1;
-    while (hi >= lo) {
-      int mid = (hi + lo) / 2;
-      int mid_pc = GetDexPc(mid);
-      if (dex_pc > mid_pc) {
-        lo = mid + 1;
-      } else if (dex_pc < mid_pc) {
-        hi = mid - 1;
-      } else {
-        return GetBitMap(mid);
-      }
-    }
-  }
-  if (error_if_not_present) {
-    LOG(ERROR) << "Didn't find reference bit map for dex_pc " << dex_pc;
-  }
-  return nullptr;
-}
-
-}  // namespace verifier
-}  // namespace art
diff --git a/runtime/verifier/dex_gc_map.h b/runtime/verifier/dex_gc_map.h
deleted file mode 100644
index 03a7821..0000000
--- a/runtime/verifier/dex_gc_map.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
-#define ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-
-namespace art {
-namespace verifier {
-
-/*
- * Format enumeration for RegisterMap data area.
- */
-enum RegisterMapFormat {
-  kRegMapFormatUnknown = 0,
-  kRegMapFormatNone = 1,       // Indicates no map data follows.
-  kRegMapFormatCompact8 = 2,   // Compact layout, 8-bit addresses.
-  kRegMapFormatCompact16 = 3,  // Compact layout, 16-bit addresses.
-};
-
-// Lightweight wrapper for Dex PC to reference bit maps.
-class DexPcToReferenceMap {
- public:
-  explicit DexPcToReferenceMap(const uint8_t* data) : data_(data) {
-    CHECK(data_ != nullptr);
-  }
-
-  // The total size of the reference bit map including header.
-  size_t RawSize() const {
-    return EntryWidth() * NumEntries() + 4u /* header */;
-  }
-
-  // The number of entries in the table
-  size_t NumEntries() const {
-    return GetData()[2] | (GetData()[3] << 8);
-  }
-
-  // Get the Dex PC at the given index
-  uint16_t GetDexPc(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    if (DexPcWidth() == 1) {
-      return Table()[entry_offset];
-    } else {
-      return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
-    }
-  }
-
-  // Return address of bitmap encoding what are live references
-  const uint8_t* GetBitMap(size_t index) const {
-    size_t entry_offset = index * EntryWidth();
-    return &Table()[entry_offset + DexPcWidth()];
-  }
-
-  // Find the bitmap associated with the given dex pc
-  const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
-
-  // The number of bytes used to encode registers
-  size_t RegWidth() const {
-    return GetData()[1] | ((GetData()[0] & ~kRegMapFormatMask) << kRegMapFormatShift);
-  }
-
- private:
-  // Table of num_entries * (dex pc, bitmap)
-  const uint8_t* Table() const {
-    return GetData() + 4;
-  }
-
-  // The format of the table of the PCs for the table
-  RegisterMapFormat Format() const {
-    return static_cast<RegisterMapFormat>(GetData()[0] & kRegMapFormatMask);
-  }
-
-  // Number of bytes used to encode a dex pc
-  size_t DexPcWidth() const {
-    RegisterMapFormat format = Format();
-    switch (format) {
-      case kRegMapFormatCompact8:
-        return 1;
-      case kRegMapFormatCompact16:
-        return 2;
-      default:
-        LOG(FATAL) << "Invalid format " << static_cast<int>(format);
-        return -1;
-    }
-  }
-
-  // The width of an entry in the table
-  size_t EntryWidth() const {
-    return DexPcWidth() + RegWidth();
-  }
-
-  const uint8_t* GetData() const {
-    return data_;
-  }
-
-  static const int kRegMapFormatShift = 5;
-  static const uint8_t kRegMapFormatMask = 0x7;
-
-  const uint8_t* const data_;  // The header and table data
-};
-
-}  // namespace verifier
-}  // namespace art
-
-#endif  // ART_RUNTIME_VERIFIER_DEX_GC_MAP_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 537d9c9..2b96328 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -47,7 +47,6 @@
 #include "scoped_thread_state_change.h"
 #include "utils.h"
 #include "handle_scope-inl.h"
-#include "verifier/dex_gc_map.h"
 
 namespace art {
 namespace verifier {
@@ -123,7 +122,7 @@
                                                         mirror::Class* klass,
                                                         CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
-                                                        bool log_hard_failures,
+                                                        LogSeverity log_level,
                                                         std::string* error) {
   if (klass->IsVerified()) {
     return kNoFailure;
@@ -162,7 +161,7 @@
                      class_def,
                      callbacks,
                      allow_soft_failures,
-                     log_hard_failures,
+                     log_level,
                      error);
 }
 
@@ -196,7 +195,7 @@
                                                           Handle<mirror::ClassLoader> class_loader,
                                                           CompilerCallbacks* callbacks,
                                                           bool allow_soft_failures,
-                                                          bool log_hard_failures,
+                                                          LogSeverity log_level,
                                                           bool need_precise_constants,
                                                           std::string* error_string) {
   DCHECK(it != nullptr);
@@ -237,7 +236,7 @@
                                                       it->GetMethodAccessFlags(),
                                                       callbacks,
                                                       allow_soft_failures,
-                                                      log_hard_failures,
+                                                      log_level,
                                                       need_precise_constants,
                                                       &hard_failure_msg);
     if (result.kind == kHardFailure) {
@@ -267,7 +266,7 @@
                                                         const DexFile::ClassDef* class_def,
                                                         CompilerCallbacks* callbacks,
                                                         bool allow_soft_failures,
-                                                        bool log_hard_failures,
+                                                        LogSeverity log_level,
                                                         std::string* error) {
   DCHECK(class_def != nullptr);
   ScopedTrace trace(__FUNCTION__);
@@ -300,7 +299,7 @@
                                                           class_loader,
                                                           callbacks,
                                                           allow_soft_failures,
-                                                          log_hard_failures,
+                                                          log_level,
                                                           false /* need precise constants */,
                                                           error);
   // Virtual methods.
@@ -313,7 +312,7 @@
                                                            class_loader,
                                                            callbacks,
                                                            allow_soft_failures,
-                                                           log_hard_failures,
+                                                           log_level,
                                                            false /* need precise constants */,
                                                            error);
 
@@ -361,15 +360,26 @@
                                                          uint32_t method_access_flags,
                                                          CompilerCallbacks* callbacks,
                                                          bool allow_soft_failures,
-                                                         bool log_hard_failures,
+                                                         LogSeverity log_level,
                                                          bool need_precise_constants,
                                                          std::string* hard_failure_msg) {
   MethodVerifier::FailureData result;
   uint64_t start_ns = kTimeVerifyMethod ? NanoTime() : 0;
 
-  MethodVerifier verifier(self, dex_file, dex_cache, class_loader, class_def, code_item,
-                          method_idx, method, method_access_flags, true, allow_soft_failures,
-                          need_precise_constants, true);
+  MethodVerifier verifier(self,
+                          dex_file,
+                          dex_cache,
+                          class_loader,
+                          class_def,
+                          code_item,
+                          method_idx,
+                          method,
+                          method_access_flags,
+                          true /* can_load_classes */,
+                          allow_soft_failures,
+                          need_precise_constants,
+                          false /* verify to dump */,
+                          true /* allow_thread_suspension */);
   if (verifier.Verify()) {
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
@@ -386,6 +396,18 @@
                                                     << PrettyMethod(method_idx, *dex_file) << "\n");
       }
       result.kind = kSoftFailure;
+      if (method != nullptr &&
+          !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+      }
+    }
+    if (method != nullptr) {
+      if (verifier.HasInstructionThatWillThrow()) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+      }
+      if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+      }
     }
   } else {
     // Bad method data.
@@ -397,9 +419,12 @@
       result.kind = kSoftFailure;
     } else {
       CHECK(verifier.have_pending_hard_failure_);
-      if (VLOG_IS_ON(verifier) || log_hard_failures) {
-        verifier.DumpFailures(LOG(INFO) << "Verification error in "
-                                        << PrettyMethod(method_idx, *dex_file) << "\n");
+      if (VLOG_IS_ON(verifier)) {
+        log_level = LogSeverity::VERBOSE;
+      }
+      if (log_level > LogSeverity::VERBOSE) {
+        verifier.DumpFailures(LOG(log_level) << "Verification error in "
+                                             << PrettyMethod(method_idx, *dex_file) << "\n");
       }
       if (hard_failure_msg != nullptr) {
         CHECK(!verifier.failure_messages_.empty());
@@ -441,9 +466,20 @@
                                                     const DexFile::CodeItem* code_item,
                                                     ArtMethod* method,
                                                     uint32_t method_access_flags) {
-  MethodVerifier* verifier = new MethodVerifier(self, dex_file, dex_cache, class_loader,
-                                                class_def, code_item, dex_method_idx, method,
-                                                method_access_flags, true, true, true, true);
+  MethodVerifier* verifier = new MethodVerifier(self,
+                                                dex_file,
+                                                dex_cache,
+                                                class_loader,
+                                                class_def,
+                                                code_item,
+                                                dex_method_idx,
+                                                method,
+                                                method_access_flags,
+                                                true /* can_load_classes */,
+                                                true /* allow_soft_failures */,
+                                                true /* need_precise_constants */,
+                                                true /* verify_to_dump */,
+                                                true /* allow_thread_suspension */);
   verifier->Verify();
   verifier->DumpFailures(vios->Stream());
   vios->Stream() << verifier->info_messages_.str();
@@ -520,9 +556,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
-                          false, true, false, false);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          false /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          false /* allow_thread_suspension */);
   verifier.interesting_dex_pc_ = dex_pc;
   verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs;
   verifier.FindLocksAtDexPc();
@@ -564,9 +611,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true, false, true);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          true  /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          true  /* allow_thread_suspension */);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
@@ -593,9 +651,20 @@
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
-  MethodVerifier verifier(hs.Self(), m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
-                          m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true,
-                          true, false, true);
+  MethodVerifier verifier(hs.Self(),
+                          m->GetDexFile(),
+                          dex_cache,
+                          class_loader,
+                          &m->GetClassDef(),
+                          m->GetCodeItem(),
+                          m->GetDexMethodIndex(),
+                          m,
+                          m->GetAccessFlags(),
+                          true  /* can_load_classes */,
+                          true  /* allow_soft_failures */,
+                          false /* need_precise_constants */,
+                          false /* verify_to_dump */,
+                          true  /* allow_thread_suspension */);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
 
@@ -667,13 +736,15 @@
 
   // If there aren't any instructions, make sure that's expected, then exit successfully.
   if (code_item_ == nullptr) {
+    // Only native or abstract methods may not have code.
+    if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
+      return false;
+    }
+
     // This should have been rejected by the dex file verifier. Only do in debug build.
+    // Note: the above will also be rejected in the dex file verifier, starting in dex version 37.
     if (kIsDebugBuild) {
-      // Only native or abstract methods may not have code.
-      if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
-        return false;
-      }
       if ((method_access_flags_ & kAccAbstract) != 0) {
         // Abstract methods are not allowed to have the following flags.
         static constexpr uint32_t kForbidden =
@@ -731,9 +802,16 @@
         } else if (method_access_flags_ & kAccFinal) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
           return false;
-        } else if (!(method_access_flags_ & kAccPublic)) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
-          return false;
+        } else {
+          uint32_t access_flag_options = kAccPublic;
+          if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+            access_flag_options |= kAccPrivate;
+          }
+          if (!(method_access_flags_ & access_flag_options)) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+                << "interfaces may not have protected or package-private members";
+            return false;
+          }
         }
       }
     }
@@ -2425,6 +2503,10 @@
         if (!array_type.IsArrayTypes()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with array type "
                                             << array_type;
+        } else if (array_type.IsUnresolvedTypes()) {
+          // If it's an unresolved array type, it must be non-primitive.
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data for array of type "
+                                            << array_type;
         } else {
           const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
           DCHECK(!component_type.IsConflict());
@@ -3731,9 +3813,12 @@
   // Note: this check must be after the initializer check, as those are required to fail a class,
   //       while this check implies an IncompatibleClassChangeError.
   if (klass->IsInterface()) {
-    // methods called on interfaces should be invoke-interface, invoke-super, or invoke-static.
+    // methods called on interfaces should be invoke-interface, invoke-super, invoke-direct (if
+    // dex file version is 37 or greater), or invoke-static.
     if (method_type != METHOD_INTERFACE &&
         method_type != METHOD_STATIC &&
+        ((dex_file_->GetVersion() < DexFile::kDefaultMethodsVersion) ||
+         method_type != METHOD_DIRECT) &&
         method_type != METHOD_SUPER) {
       Fail(VERIFY_ERROR_CLASS_CHANGE)
           << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
@@ -4016,8 +4101,8 @@
                                     << " to super " << PrettyMethod(res_method);
         return nullptr;
       }
-      mirror::Class* super_klass = super.GetClass();
-      if (res_method->GetMethodIndex() >= super_klass->GetVTableLength()) {
+      if (!reference_class->IsAssignableFrom(GetDeclaringClass().GetClass()) ||
+          (res_method->GetMethodIndex() >= super.GetClass()->GetVTableLength())) {
         Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
                                     << PrettyMethod(dex_method_idx_, *dex_file_)
                                     << " to super " << super
@@ -4209,6 +4294,7 @@
       const RegType& precise_type = reg_types_.FromUninitialized(res_type);
       work_line_->SetRegisterType<LockOp::kClear>(this, inst->VRegA_22c(), precise_type);
     } else {
+      DCHECK(!res_type.IsUnresolvedMergedReference());
       // Verify each register. If "arg_count" is bad, VerifyRegisterType() will run off the end of
       // the list and fail. It's legal, if silly, for arg_count to be zero.
       const RegType& expected_type = reg_types_.GetComponentType(res_type, GetClassLoader());
@@ -4253,6 +4339,19 @@
       }
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aget";
+    } else if (array_type.IsUnresolvedMergedReference()) {
+      // Unresolved array types must be reference array types.
+      if (is_primitive) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "reference array type " << array_type
+                    << " source for category 1 aget";
+      } else {
+        Fail(VERIFY_ERROR_NO_CLASS) << "cannot verify aget for " << array_type
+            << " because of missing class";
+        // Approximate with java.lang.Object[].
+        work_line_->SetRegisterType<LockOp::kClear>(this,
+                                                    inst->VRegA_23x(),
+                                                    reg_types_.JavaLangObject(false));
+      }
     } else {
       /* verify the class */
       const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
@@ -4363,6 +4462,15 @@
       work_line_->VerifyRegisterType(this, inst->VRegA_23x(), *modified_reg_type);
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
+    } else if (array_type.IsUnresolvedMergedReference()) {
+      // Unresolved array types must be reference array types.
+      if (is_primitive) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "put insn has type '" << insn_type
+                                          << "' but unresolved type '" << array_type << "'";
+      } else {
+        Fail(VERIFY_ERROR_NO_CLASS) << "cannot verify aput for " << array_type
+                                    << " because of missing class";
+      }
     } else {
       const RegType& component_type = reg_types_.GetComponentType(array_type, GetClassLoader());
       const uint32_t vregA = inst->VRegA_23x();
@@ -4481,8 +4589,18 @@
       // Trying to access C1.field1 using reference of type C2, which is neither C1 or a sub-class
       // of C1. For resolution to occur the declared class of the field must be compatible with
       // obj_type, we've discovered this wasn't so, so report the field didn't exist.
-      Fail(VERIFY_ERROR_NO_FIELD) << "cannot access instance field " << PrettyField(field)
-                                  << " from object of type " << obj_type;
+      VerifyError type;
+      bool is_aot = Runtime::Current()->IsAotCompiler();
+      if (is_aot && (field_klass.IsUnresolvedTypes() || obj_type.IsUnresolvedTypes())) {
+        // Compiler & unresolved types involved, retry at runtime.
+        type = VerifyError::VERIFY_ERROR_NO_CLASS;
+      } else {
+        // Classes known (resolved; and thus assignability check is precise), or we are at runtime
+        // and still missing classes. This is a hard failure.
+        type = VerifyError::VERIFY_ERROR_BAD_CLASS_HARD;
+      }
+      Fail(type) << "cannot access instance field " << PrettyField(field)
+                 << " from object of type " << obj_type;
       return nullptr;
     } else {
       return field;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 6d8e1ab..2592a21 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -43,7 +43,6 @@
 
 namespace verifier {
 
-class DexPcToReferenceMap;
 class MethodVerifier;
 class RegisterLine;
 using RegisterLineArenaUniquePtr = std::unique_ptr<RegisterLine, RegisterLineArenaDelete>;
@@ -143,12 +142,20 @@
     kHardFailure,
   };
 
-  /* Verify a class. Returns "kNoFailure" on success. */
+  static bool CanCompilerHandleVerificationFailure(uint32_t encountered_failure_types) {
+    constexpr uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
+    return (encountered_failure_types & (~unresolved_mask)) == 0;
+  }
+
+  // Verify a class. Returns "kNoFailure" on success.
   static FailureKind VerifyClass(Thread* self,
                                  mirror::Class* klass,
                                  CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
-                                 bool log_hard_failures,
+                                 LogSeverity log_level,
                                  std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static FailureKind VerifyClass(Thread* self,
@@ -158,7 +165,7 @@
                                  const DexFile::ClassDef* class_def,
                                  CompilerCallbacks* callbacks,
                                  bool allow_soft_failures,
-                                 bool log_hard_failures,
+                                 LogSeverity log_level,
                                  std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -220,35 +227,6 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(Thread* self,
-                 const DexFile* dex_file,
-                 Handle<mirror::DexCache> dex_cache,
-                 Handle<mirror::ClassLoader> class_loader,
-                 const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item,
-                 uint32_t method_idx,
-                 ArtMethod* method,
-                 uint32_t access_flags,
-                 bool can_load_classes,
-                 bool allow_soft_failures,
-                 bool need_precise_constants,
-                 bool allow_thread_suspension)
-          SHARED_REQUIRES(Locks::mutator_lock_)
-      : MethodVerifier(self,
-                       dex_file,
-                       dex_cache,
-                       class_loader,
-                       class_def,
-                       code_item,
-                       method_idx,
-                       method,
-                       access_flags,
-                       can_load_classes,
-                       allow_soft_failures,
-                       need_precise_constants,
-                       false,
-                       allow_thread_suspension) {}
-
   ~MethodVerifier();
 
   // Run verification on the method. Returns true if verification completes and false if the input
@@ -304,20 +282,6 @@
   }
 
  private:
-  void UninstantiableError(const char* descriptor);
-  static bool IsInstantiableOrPrimitive(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Is the method being verified a constructor? See the comment on the field.
-  bool IsConstructor() const {
-    return is_constructor_;
-  }
-
-  // Is the method verified static?
-  bool IsStatic() const {
-    return (method_access_flags_ & kAccStatic) != 0;
-  }
-
-  // Private constructor for dumping.
   MethodVerifier(Thread* self,
                  const DexFile* dex_file,
                  Handle<mirror::DexCache> dex_cache,
@@ -334,6 +298,19 @@
                  bool allow_thread_suspension)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void UninstantiableError(const char* descriptor);
+  static bool IsInstantiableOrPrimitive(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Is the method being verified a constructor? See the comment on the field.
+  bool IsConstructor() const {
+    return is_constructor_;
+  }
+
+  // Is the method verified static?
+  bool IsStatic() const {
+    return (method_access_flags_ & kAccStatic) != 0;
+  }
+
   // Adds the given string to the beginning of the last failure message.
   void PrependToLastFailMessage(std::string);
 
@@ -362,7 +339,7 @@
                                    Handle<mirror::ClassLoader> class_loader,
                                    CompilerCallbacks* callbacks,
                                    bool allow_soft_failures,
-                                   bool log_hard_failures,
+                                   LogSeverity log_level,
                                    bool need_precise_constants,
                                    std::string* error_string)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -388,7 +365,7 @@
                                   uint32_t method_access_flags,
                                   CompilerCallbacks* callbacks,
                                   bool allow_soft_failures,
-                                  bool log_hard_failures,
+                                  LogSeverity log_level,
                                   bool need_precise_constants,
                                   std::string* hard_failure_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 946f842..b036313 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -37,8 +37,13 @@
 
     // Verify the class
     std::string error_msg;
-    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, nullptr, true, true, &error_msg)
-                    == MethodVerifier::kNoFailure) << error_msg;
+    MethodVerifier::FailureKind failure = MethodVerifier::VerifyClass(self,
+                                                                      klass,
+                                                                      nullptr,
+                                                                      true,
+                                                                      LogSeverity::WARNING,
+                                                                      &error_msg);
+    ASSERT_TRUE(failure == MethodVerifier::kNoFailure) << error_msg;
   }
 
   void VerifyDexFile(const DexFile& dex)
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 0894f5d..308c2aa 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -517,9 +517,21 @@
   }
 }
 
+bool RegType::IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
+  return IsReference() && GetClass()->IsObjectClass();
+}
+
 bool RegType::IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
-    // Primitive arrays will always resolve
+  if (IsUnresolvedTypes()) {
+    DCHECK(!IsUnresolvedMergedReference());
+
+    if (IsUnresolvedSuperClass()) {
+      // Cannot be an array, as the superclass of arrays is java.lang.Object (which cannot be
+      // unresolved).
+      return false;
+    }
+
+    // Primitive arrays will always resolve.
     DCHECK(descriptor_[1] == 'L' || descriptor_[1] == '[');
     return descriptor_[0] == '[';
   } else if (HasClass()) {
@@ -530,12 +542,15 @@
   }
 }
 
-bool RegType::IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  return IsReference() && GetClass()->IsObjectClass();
-}
-
 bool RegType::IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
+  if (IsUnresolvedTypes()) {
+    DCHECK(!IsUnresolvedMergedReference());
+
+    if (IsUnresolvedSuperClass()) {
+      // Cannot be an array, as the superclass of arrays is java.lang.Object (which cannot be
+      // unresolved).
+      return false;
+    }
     return descriptor_[0] == '[';
   } else if (HasClass()) {
     return GetClass()->IsArrayClass();
@@ -793,11 +808,50 @@
   }
 }
 void UnresolvedMergedType::CheckInvariants() const {
+  CHECK(reg_type_cache_ != nullptr);
+
   // Unresolved merged types: merged types should be defined.
   CHECK(descriptor_.empty()) << *this;
   CHECK(klass_.IsNull()) << *this;
+
+  CHECK(!resolved_part_.IsConflict());
   CHECK(resolved_part_.IsReferenceTypes());
   CHECK(!resolved_part_.IsUnresolvedTypes());
+
+  CHECK(resolved_part_.IsZero() ||
+        !(resolved_part_.IsArrayTypes() && !resolved_part_.IsObjectArrayTypes()));
+
+  CHECK_GT(unresolved_types_.NumSetBits(), 0U);
+  bool unresolved_is_array =
+      reg_type_cache_->GetFromId(unresolved_types_.GetHighestBitSet()).IsArrayTypes();
+  for (uint32_t idx : unresolved_types_.Indexes()) {
+    const RegType& t = reg_type_cache_->GetFromId(idx);
+    CHECK_EQ(unresolved_is_array, t.IsArrayTypes());
+  }
+
+  if (!resolved_part_.IsZero()) {
+    CHECK_EQ(resolved_part_.IsArrayTypes(), unresolved_is_array);
+  }
+}
+
+bool UnresolvedMergedType::IsArrayTypes() const {
+  // For a merge to be an array, both the resolved and the unresolved part need to be object
+  // arrays.
+  // (Note: we encode a missing resolved part [which doesn't need to be an array] as zero.)
+
+  if (!resolved_part_.IsZero() && !resolved_part_.IsArrayTypes()) {
+    return false;
+  }
+
+  // It is enough to check just one of the merged types. Otherwise the merge should have been
+  // collapsed (checked in CheckInvariants on construction).
+  uint32_t idx = unresolved_types_.GetHighestBitSet();
+  const RegType& unresolved = reg_type_cache_->GetFromId(idx);
+  return unresolved.IsArrayTypes();
+}
+bool UnresolvedMergedType::IsObjectArrayTypes() const {
+  // Same as IsArrayTypes, as primitive arrays are always resolved.
+  return IsArrayTypes();
 }
 
 void UnresolvedReferenceType::CheckInvariants() const {
@@ -824,6 +878,14 @@
     return false;
   }
 
+  if (IsUnresolvedMergedReference() || src.IsUnresolvedMergedReference()) {
+    // An unresolved array type means that it's an array of some reference type. Reference arrays
+    // can never be assigned to primitive-type arrays, and vice versa. So it is a soft error if
+    // both arrays are reference arrays, otherwise a hard error.
+    *soft_error = IsObjectArrayTypes() && src.IsObjectArrayTypes();
+    return false;
+  }
+
   const RegType& cmp1 = reg_types.GetComponentType(*this, class_loader.Get());
   const RegType& cmp2 = reg_types.GetComponentType(src, class_loader.Get());
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 7c7981e..4837490 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -172,8 +172,8 @@
   }
   virtual bool HasClassVirtual() const { return false; }
   bool IsJavaLangObject() const SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
-  bool IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
+  virtual bool IsArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
+  virtual bool IsObjectArrayTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
   Primitive::Type GetPrimitiveType() const;
   bool IsJavaLangObjectArray() const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -905,6 +905,9 @@
 
   bool IsUnresolvedTypes() const OVERRIDE { return true; }
 
+  bool IsArrayTypes() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  bool IsObjectArrayTypes() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index b171b75..71c2a90 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -347,29 +347,39 @@
                        kDefaultArenaBitVectorBytes * kBitsPerByte,  // Allocate at least 8 bytes.
                        true);                                       // Is expandable.
   const RegType* left_resolved;
+  bool left_unresolved_is_array;
   if (left.IsUnresolvedMergedReference()) {
-    const UnresolvedMergedType* left_merge = down_cast<const UnresolvedMergedType*>(&left);
-    types.Copy(&left_merge->GetUnresolvedTypes());
-    left_resolved = &left_merge->GetResolvedPart();
+    const UnresolvedMergedType& left_merge = *down_cast<const UnresolvedMergedType*>(&left);
+
+    types.Copy(&left_merge.GetUnresolvedTypes());
+    left_resolved = &left_merge.GetResolvedPart();
+    left_unresolved_is_array = left.IsArrayTypes();
   } else if (left.IsUnresolvedTypes()) {
     types.ClearAllBits();
     types.SetBit(left.GetId());
     left_resolved = &Zero();
+    left_unresolved_is_array = left.IsArrayTypes();
   } else {
     types.ClearAllBits();
     left_resolved = &left;
+    left_unresolved_is_array = false;
   }
 
   const RegType* right_resolved;
+  bool right_unresolved_is_array;
   if (right.IsUnresolvedMergedReference()) {
-    const UnresolvedMergedType* right_merge = down_cast<const UnresolvedMergedType*>(&right);
-    types.Union(&right_merge->GetUnresolvedTypes());
-    right_resolved = &right_merge->GetResolvedPart();
+    const UnresolvedMergedType& right_merge = *down_cast<const UnresolvedMergedType*>(&right);
+
+    types.Union(&right_merge.GetUnresolvedTypes());
+    right_resolved = &right_merge.GetResolvedPart();
+    right_unresolved_is_array = right.IsArrayTypes();
   } else if (right.IsUnresolvedTypes()) {
     types.SetBit(right.GetId());
     right_resolved = &Zero();
+    right_unresolved_is_array = right.IsArrayTypes();
   } else {
     right_resolved = &right;
+    right_unresolved_is_array = false;
   }
 
   // Merge the resolved parts. Left and right might be equal, so use SafeMerge.
@@ -379,6 +389,23 @@
     return Conflict();
   }
 
+  bool resolved_merged_is_array = resolved_parts_merged.IsArrayTypes();
+  if (left_unresolved_is_array || right_unresolved_is_array || resolved_merged_is_array) {
+    // Arrays involved, see if we need to merge to Object.
+
+    // Is the resolved part a primitive array?
+    if (resolved_merged_is_array && !resolved_parts_merged.IsObjectArrayTypes()) {
+      return JavaLangObject(false /* precise */);
+    }
+
+    // Is any part not an array (but exists)?
+    if ((!left_unresolved_is_array && left_resolved != &left) ||
+        (!right_unresolved_is_array && right_resolved != &right) ||
+        !resolved_merged_is_array) {
+      return JavaLangObject(false /* precise */);
+    }
+  }
+
   // Check if entry already exists.
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
     const RegType* cur_entry = entries_[i];
@@ -584,6 +611,7 @@
   if (!array.IsArrayTypes()) {
     return Conflict();
   } else if (array.IsUnresolvedTypes()) {
+    DCHECK(!array.IsUnresolvedMergedReference());  // Caller must make sure not to ask for this.
     const std::string descriptor(array.GetDescriptor().as_string());
     return FromDescriptor(loader, descriptor.c_str() + 1, false);
   } else {
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 29d87c4..d2f3485 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -127,17 +127,6 @@
   }
 }
 
-inline size_t RegisterLine::GetMaxNonZeroReferenceReg(MethodVerifier* verifier,
-                                                      size_t max_ref_reg) const {
-  size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
-  for (; i < num_regs_; i++) {
-    if (GetRegisterType(verifier, i).IsNonZeroReferenceTypes()) {
-      max_ref_reg = i;
-    }
-  }
-  return max_ref_reg;
-}
-
 inline bool RegisterLine::VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
                                              const RegType& check_type) {
   // Verify the src register type against the check type refining the type of the register
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 82c371d..71aa94e 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -543,24 +543,5 @@
   return changed;
 }
 
-void RegisterLine::WriteReferenceBitMap(MethodVerifier* verifier,
-                                        std::vector<uint8_t>* data, size_t max_bytes) {
-  for (size_t i = 0; i < num_regs_; i += 8) {
-    uint8_t val = 0;
-    for (size_t j = 0; j < 8 && (i + j) < num_regs_; j++) {
-      // Note: we write 1 for a Reference but not for Null
-      if (GetRegisterType(verifier, i + j).IsNonZeroReferenceTypes()) {
-        val |= 1 << j;
-      }
-    }
-    if ((i / 8) >= max_bytes) {
-      DCHECK_EQ(0, val);
-      continue;
-    }
-    DCHECK_LT(i / 8, max_bytes) << "val=" << static_cast<uint32_t>(val);
-    data->push_back(val);
-  }
-}
-
 }  // namespace verifier
 }  // namespace art
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 15ae202..56846c1 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -346,11 +346,6 @@
   bool MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  size_t GetMaxNonZeroReferenceReg(MethodVerifier* verifier, size_t max_ref_reg) const;
-
-  // Write a bit at each register location that holds a reference.
-  void WriteReferenceBitMap(MethodVerifier* verifier, std::vector<uint8_t>* data, size_t max_bytes);
-
   size_t GetMonitorEnterCount() const {
     return monitors_.size();
   }
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
deleted file mode 100644
index db9e1ea..0000000
--- a/runtime/vmap_table.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_VMAP_TABLE_H_
-#define ART_RUNTIME_VMAP_TABLE_H_
-
-#include "base/logging.h"
-#include "leb128.h"
-#include "stack.h"
-
-namespace art {
-
-class VmapTable {
- public:
-  // For efficient encoding of special values, entries are adjusted by 2.
-  static constexpr uint16_t kEntryAdjustment = 2u;
-  static constexpr uint16_t kAdjustedFpMarker = static_cast<uint16_t>(0xffffu + kEntryAdjustment);
-
-  explicit VmapTable(const uint8_t* table) : table_(table) {
-  }
-
-  // Look up nth entry, not called from performance critical code.
-  uint16_t operator[](size_t n) const {
-    const uint8_t* table = table_;
-    size_t size = DecodeUnsignedLeb128(&table);
-    CHECK_LT(n, size);
-    uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);
-    for (size_t i = 0; i < n; ++i) {
-      adjusted_entry = DecodeUnsignedLeb128(&table);
-    }
-    return adjusted_entry - kEntryAdjustment;
-  }
-
-  size_t Size() const {
-    const uint8_t* table = table_;
-    return DecodeUnsignedLeb128(&table);
-  }
-
-  // Is the dex register 'vreg' in the context or on the stack? Should not be called when the
-  // 'kind' is unknown or constant.
-  bool IsInContext(size_t vreg, VRegKind kind, uint32_t* vmap_offset) const {
-    DCHECK(kind == kReferenceVReg || kind == kIntVReg || kind == kFloatVReg ||
-           kind == kLongLoVReg || kind == kLongHiVReg || kind == kDoubleLoVReg ||
-           kind == kDoubleHiVReg || kind == kImpreciseConstant);
-    *vmap_offset = 0xEBAD0FF5;
-    // TODO: take advantage of the registers being ordered
-    // TODO: we treat kImpreciseConstant as an integer below, need to ensure that such values
-    //       are never promoted to floating point registers.
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    bool in_floats = false;
-    const uint8_t* table = table_;
-    uint16_t adjusted_vreg = vreg + kEntryAdjustment;
-    size_t end = DecodeUnsignedLeb128(&table);
-    bool high_reg = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
-    bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64) || (kRuntimeISA == kMips64);
-    if (target64 && high_reg) {
-      // Wide promoted registers are associated with the sreg of the low portion.
-      adjusted_vreg--;
-    }
-    for (size_t i = 0; i < end; ++i) {
-      // Stop if we find what we are are looking for.
-      uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);
-      if ((adjusted_entry == adjusted_vreg) && (in_floats == is_float)) {
-        *vmap_offset = i;
-        return true;
-      }
-      // 0xffff is the marker for LR (return PC on x86), following it are spilled float registers.
-      if (adjusted_entry == kAdjustedFpMarker) {
-        in_floats = true;
-      }
-    }
-    return false;
-  }
-
-  // Compute the register number that corresponds to the entry in the vmap (vmap_offset, computed
-  // by IsInContext above). If the kind is floating point then the result will be a floating point
-  // register number, otherwise it will be an integer register number.
-  uint32_t ComputeRegister(uint32_t spill_mask, uint32_t vmap_offset, VRegKind kind) const {
-    // Compute the register we need to load from the context.
-    DCHECK(kind == kReferenceVReg || kind == kIntVReg || kind == kFloatVReg ||
-           kind == kLongLoVReg || kind == kLongHiVReg || kind == kDoubleLoVReg ||
-           kind == kDoubleHiVReg || kind == kImpreciseConstant);
-    // TODO: we treat kImpreciseConstant as an integer below, need to ensure that such values
-    //       are never promoted to floating point registers.
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t matches = 0;
-    if (UNLIKELY(is_float)) {
-      const uint8_t* table = table_;
-      DecodeUnsignedLeb128(&table);  // Skip size.
-      while (DecodeUnsignedLeb128(&table) != kAdjustedFpMarker) {
-        matches++;
-      }
-      matches++;
-    }
-    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(POPCOUNT(spill_mask)));
-    uint32_t spill_shifts = 0;
-    while (matches != (vmap_offset + 1)) {
-      DCHECK_NE(spill_mask, 0u);
-      matches += spill_mask & 1;  // Add 1 if the low bit is set
-      spill_mask >>= 1;
-      spill_shifts++;
-    }
-    spill_shifts--;  // wind back one as we want the last match
-    return spill_shifts;
-  }
-
- private:
-  const uint8_t* const table_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_VMAP_TABLE_H_
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index b76555b..f29301d 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -103,7 +103,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index dfe0c6f..aa3c360 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -38,7 +38,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index 2bdf8d1..8619ff7 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -14,23 +14,22 @@
  * limitations under the License.
  */
 
-#include <assert.h>
 #include <iostream>
 #include <pthread.h>
 #include <stdio.h>
 #include <vector>
 
+#include "art_method-inl.h"
+#include "base/logging.h"
 #include "jni.h"
 
-#if defined(NDEBUG)
-#error test code compiled without NDEBUG
-#endif
+namespace art {
 
 static JavaVM* jvm = nullptr;
 
 extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
-  assert(vm != nullptr);
-  assert(jvm == nullptr);
+  CHECK(vm != nullptr);
+  CHECK(jvm == nullptr);
   jvm = vm;
   std::cout << "JNI_OnLoad called" << std::endl;
   return JNI_VERSION_1_6;
@@ -39,24 +38,24 @@
 extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) {
   // std::cout since LOG(INFO) adds extra stuff like pid.
   std::cout << "JNI_OnUnload called" << std::endl;
-  // Clear jvm for assert in test 004-JniTest.
+  // Clear jvm for CHECK in test 004-JniTest.
   jvm = nullptr;
 }
 
 static void* AttachHelper(void* arg) {
-  assert(jvm != nullptr);
+  CHECK(jvm != nullptr);
 
   JNIEnv* env = nullptr;
   JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, nullptr };
   int attach_result = jvm->AttachCurrentThread(&env, &args);
-  assert(attach_result == 0);
+  CHECK_EQ(attach_result, 0);
 
   typedef void (*Fn)(JNIEnv*);
   Fn fn = reinterpret_cast<Fn>(arg);
   fn(env);
 
   int detach_result = jvm->DetachCurrentThread();
-  assert(detach_result == 0);
+  CHECK_EQ(detach_result, 0);
   return nullptr;
 }
 
@@ -64,19 +63,19 @@
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread, nullptr, AttachHelper,
                                              reinterpret_cast<void*>(fn));
-  assert(pthread_create_result == 0);
+  CHECK_EQ(pthread_create_result, 0);
   int pthread_join_result = pthread_join(pthread, nullptr);
-  assert(pthread_join_result == 0);
+  CHECK_EQ(pthread_join_result, 0);
 }
 
 static void testFindClassOnAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jobjectArray array = env->NewObjectArray(0, clazz, nullptr);
-  assert(array != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(array != nullptr);
+  CHECK(!env->ExceptionCheck());
 }
 
 // http://b/10994325
@@ -86,12 +85,12 @@
 
 static void testFindFieldOnAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jfieldID field = env->GetStaticFieldID(clazz, "testFindFieldOnAttachedNativeThreadField", "Z");
-  assert(field != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   env->SetStaticBooleanField(clazz, field, JNI_TRUE);
 }
@@ -103,38 +102,38 @@
 
 static void testReflectFieldGetFromAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jclass class_clazz = env->FindClass("java/lang/Class");
-  assert(class_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(class_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField",
                                                "(Ljava/lang/String;)Ljava/lang/reflect/Field;");
-  assert(getFieldMetodId != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(getFieldMetodId != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField");
-  assert(field_name != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field_name != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name);
-  assert(field != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jclass field_clazz = env->FindClass("java/lang/reflect/Field");
-  assert(field_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(field_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean",
                                                  "(Ljava/lang/Object;)Z");
-  assert(getBooleanMetodId != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(getBooleanMetodId != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz);
-  assert(value == false);
-  assert(!env->ExceptionCheck());
+  CHECK(value == false);
+  CHECK(!env->ExceptionCheck());
 }
 
 // http://b/15539150
@@ -148,22 +147,22 @@
 extern "C" JNIEXPORT void JNICALL Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
                                                                                      jclass) {
   jclass super_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SuperClass");
-  assert(super_class != nullptr);
+  CHECK(super_class != nullptr);
 
   jmethodID execute = env->GetStaticMethodID(super_class, "execute", "()V");
-  assert(execute != nullptr);
+  CHECK(execute != nullptr);
 
   jclass sub_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SubClass");
-  assert(sub_class != nullptr);
+  CHECK(sub_class != nullptr);
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
 
 extern "C" JNIEXPORT jobject JNICALL Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass) {
   jclass abstract_class = env->FindClass("Main$testGetMirandaMethod_MirandaAbstract");
-  assert(abstract_class != nullptr);
+  CHECK(abstract_class != nullptr);
   jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
-  assert(miranda_method != nullptr);
+  CHECK(miranda_method != nullptr);
   return env->ToReflectedMethod(abstract_class, miranda_method, JNI_FALSE);
 }
 
@@ -171,11 +170,11 @@
 extern "C" void JNICALL Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass) {
   std::vector<uint8_t> buffer(1);
   jobject byte_buffer = env->NewDirectByteBuffer(&buffer[0], 0);
-  assert(byte_buffer != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(byte_buffer != nullptr);
+  CHECK(!env->ExceptionCheck());
 
-  assert(env->GetDirectBufferAddress(byte_buffer) == &buffer[0]);
-  assert(env->GetDirectBufferCapacity(byte_buffer) == 0);
+  CHECK_EQ(env->GetDirectBufferAddress(byte_buffer), &buffer[0]);
+  CHECK_EQ(env->GetDirectBufferCapacity(byte_buffer), 0);
 }
 
 constexpr size_t kByteReturnSize = 7;
@@ -185,18 +184,18 @@
                                               jbyte b3, jbyte b4, jbyte b5, jbyte b6,
                                               jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
   // We use b1 to drive the output.
-  assert(b2 == 2);
-  assert(b3 == -3);
-  assert(b4 == 4);
-  assert(b5 == -5);
-  assert(b6 == 6);
-  assert(b7 == -7);
-  assert(b8 == 8);
-  assert(b9 == -9);
-  assert(b10 == 10);
+  CHECK_EQ(b2, 2);
+  CHECK_EQ(b3, -3);
+  CHECK_EQ(b4, 4);
+  CHECK_EQ(b5, -5);
+  CHECK_EQ(b6, 6);
+  CHECK_EQ(b7, -7);
+  CHECK_EQ(b8, 8);
+  CHECK_EQ(b9, -9);
+  CHECK_EQ(b10, 10);
 
-  assert(0 <= b1);
-  assert(b1 < static_cast<jbyte>(kByteReturnSize));
+  CHECK_LE(0, b1);
+  CHECK_LT(b1, static_cast<jbyte>(kByteReturnSize));
 
   return byte_returns[b1];
 }
@@ -210,18 +209,18 @@
                                                 jshort s3, jshort s4, jshort s5, jshort s6,
                                                 jshort s7, jshort s8, jshort s9, jshort s10) {
   // We use s1 to drive the output.
-  assert(s2 == 2);
-  assert(s3 == -3);
-  assert(s4 == 4);
-  assert(s5 == -5);
-  assert(s6 == 6);
-  assert(s7 == -7);
-  assert(s8 == 8);
-  assert(s9 == -9);
-  assert(s10 == 10);
+  CHECK_EQ(s2, 2);
+  CHECK_EQ(s3, -3);
+  CHECK_EQ(s4, 4);
+  CHECK_EQ(s5, -5);
+  CHECK_EQ(s6, 6);
+  CHECK_EQ(s7, -7);
+  CHECK_EQ(s8, 8);
+  CHECK_EQ(s9, -9);
+  CHECK_EQ(s10, 10);
 
-  assert(0 <= s1);
-  assert(s1 < static_cast<jshort>(kShortReturnSize));
+  CHECK_LE(0, s1);
+  CHECK_LT(s1, static_cast<jshort>(kShortReturnSize));
 
   return short_returns[s1];
 }
@@ -231,17 +230,17 @@
                                                     jboolean b5, jboolean b6, jboolean b7,
                                                     jboolean b8, jboolean b9, jboolean b10) {
   // We use b1 to drive the output.
-  assert(b2 == JNI_TRUE);
-  assert(b3 == JNI_FALSE);
-  assert(b4 == JNI_TRUE);
-  assert(b5 == JNI_FALSE);
-  assert(b6 == JNI_TRUE);
-  assert(b7 == JNI_FALSE);
-  assert(b8 == JNI_TRUE);
-  assert(b9 == JNI_FALSE);
-  assert(b10 == JNI_TRUE);
+  CHECK_EQ(b2, JNI_TRUE);
+  CHECK_EQ(b3, JNI_FALSE);
+  CHECK_EQ(b4, JNI_TRUE);
+  CHECK_EQ(b5, JNI_FALSE);
+  CHECK_EQ(b6, JNI_TRUE);
+  CHECK_EQ(b7, JNI_FALSE);
+  CHECK_EQ(b8, JNI_TRUE);
+  CHECK_EQ(b9, JNI_FALSE);
+  CHECK_EQ(b10, JNI_TRUE);
 
-  assert(b1 == JNI_TRUE || b1 == JNI_FALSE);
+  CHECK(b1 == JNI_TRUE || b1 == JNI_FALSE);
   return b1;
 }
 
@@ -252,17 +251,17 @@
                                               jchar c3, jchar c4, jchar c5, jchar c6, jchar c7,
                                               jchar c8, jchar c9, jchar c10) {
   // We use c1 to drive the output.
-  assert(c2 == 'a');
-  assert(c3 == 'b');
-  assert(c4 == 'c');
-  assert(c5 == '0');
-  assert(c6 == '1');
-  assert(c7 == '2');
-  assert(c8 == 1234);
-  assert(c9 == 2345);
-  assert(c10 == 3456);
+  CHECK_EQ(c2, 'a');
+  CHECK_EQ(c3, 'b');
+  CHECK_EQ(c4, 'c');
+  CHECK_EQ(c5, '0');
+  CHECK_EQ(c6, '1');
+  CHECK_EQ(c7, '2');
+  CHECK_EQ(c8, 1234);
+  CHECK_EQ(c9, 2345);
+  CHECK_EQ(c10, 3456);
 
-  assert(c1 < static_cast<jchar>(kCharReturnSize));
+  CHECK_LT(c1, static_cast<jchar>(kCharReturnSize));
 
   return char_returns[c1];
 }
@@ -281,39 +280,39 @@
   // Test direct call.
   {
     jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
-    assert(vmstack_clazz != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(vmstack_clazz != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jmethodID getCallingClassLoaderMethodId = env->GetStaticMethodID(vmstack_clazz,
                                                                      "getCallingClassLoader",
                                                                      "()Ljava/lang/ClassLoader;");
-    assert(getCallingClassLoaderMethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getCallingClassLoaderMethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject class_loader = env->CallStaticObjectMethod(vmstack_clazz,
                                                        getCallingClassLoaderMethodId);
-    assert(class_loader == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(class_loader == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // Test one-level call. Use System.loadLibrary().
   {
     jclass system_clazz = env->FindClass("java/lang/System");
-    assert(system_clazz != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(system_clazz != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jmethodID loadLibraryMethodId = env->GetStaticMethodID(system_clazz, "loadLibrary",
                                                            "(Ljava/lang/String;)V");
-    assert(loadLibraryMethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(loadLibraryMethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     // Create a string object.
     jobject library_string = env->NewStringUTF("non_existing_library");
-    assert(library_string != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(library_string != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     env->CallStaticVoidMethod(system_clazz, loadLibraryMethodId, library_string);
-    assert(env->ExceptionCheck());
+    CHECK(env->ExceptionCheck());
 
     // We expect UnsatisfiedLinkError.
     jthrowable thrown = env->ExceptionOccurred();
@@ -321,7 +320,7 @@
 
     jclass unsatisfied_link_error_clazz = env->FindClass("java/lang/UnsatisfiedLinkError");
     jclass thrown_class = env->GetObjectClass(thrown);
-    assert(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
+    CHECK(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
   }
 }
 
@@ -333,31 +332,31 @@
 
 static void testShallowGetStackClass2(JNIEnv* env) {
   jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
-  assert(vmstack_clazz != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(vmstack_clazz != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   // Test direct call.
   {
     jmethodID getStackClass2MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass2",
                                                               "()Ljava/lang/Class;");
-    assert(getStackClass2MethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getStackClass2MethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass2MethodId);
-    assert(caller_class == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(caller_class == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // Test one-level call. Use VMStack.getStackClass1().
   {
     jmethodID getStackClass1MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass1",
                                                               "()Ljava/lang/Class;");
-    assert(getStackClass1MethodId != nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(getStackClass1MethodId != nullptr);
+    CHECK(!env->ExceptionCheck());
 
     jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass1MethodId);
-    assert(caller_class == nullptr);
-    assert(!env->ExceptionCheck());
+    CHECK(caller_class == nullptr);
+    CHECK(!env->ExceptionCheck());
   }
 
   // For better testing we would need to compile against libcore and have a two-deep stack
@@ -416,8 +415,8 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(!env_->ExceptionCheck());
-    assert(c != nullptr);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(c != nullptr);
     return c;
   }
 
@@ -429,7 +428,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(m != nullptr);
+    CHECK(m != nullptr);
     return m;
   }
 
@@ -439,7 +438,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(o != nullptr);
+    CHECK(o != nullptr);
     return o;
   }
 
@@ -467,7 +466,7 @@
       env_->ExceptionDescribe();
       env_->FatalError(__FUNCTION__);
     }
-    assert(m != nullptr);
+    CHECK(m != nullptr);
     return m;
   }
 
@@ -508,21 +507,21 @@
     jobject sub_super = CallConstructor(sub_, super_constructor_);
     jobject sub_sub = CallConstructor(sub_, sub_constructor_);
 
-    assert(env_->IsInstanceOf(super_super, super_));
-    assert(!env_->IsInstanceOf(super_super, sub_));
+    CHECK(env_->IsInstanceOf(super_super, super_));
+    CHECK(!env_->IsInstanceOf(super_super, sub_));
 
     // Note that even though we called (and ran) the subclass
     // constructor, we are not the subclass.
-    assert(env_->IsInstanceOf(super_sub, super_));
-    assert(!env_->IsInstanceOf(super_sub, sub_));
+    CHECK(env_->IsInstanceOf(super_sub, super_));
+    CHECK(!env_->IsInstanceOf(super_sub, sub_));
 
     // Note that even though we called the superclass constructor, we
     // are still the subclass.
-    assert(env_->IsInstanceOf(sub_super, super_));
-    assert(env_->IsInstanceOf(sub_super, sub_));
+    CHECK(env_->IsInstanceOf(sub_super, super_));
+    CHECK(env_->IsInstanceOf(sub_super, sub_));
 
-    assert(env_->IsInstanceOf(sub_sub, super_));
-    assert(env_->IsInstanceOf(sub_sub, sub_));
+    CHECK(env_->IsInstanceOf(sub_sub, super_));
+    CHECK(env_->IsInstanceOf(sub_sub, sub_));
   }
 
   void TestnonstaticCallNonvirtualMethod(bool super_object, bool super_class, bool super_method, const char* test_case) {
@@ -542,8 +541,8 @@
     CallMethod(o, c, m, true, test_case);
     jboolean super_field = GetBooleanField(o, super_field_);
     jboolean sub_field = GetBooleanField(o, sub_field_);
-    assert(super_field == super_method);
-    assert(sub_field != super_method);
+    CHECK_EQ(super_field, super_method);
+    CHECK_NE(sub_field, super_method);
   }
 
   void TestnonstaticCallNonvirtualMethod() {
@@ -565,20 +564,20 @@
 
 extern "C" JNIEXPORT void JNICALL Java_Main_testNewStringObject(JNIEnv* env, jclass) {
   jclass c = env->FindClass("java/lang/String");
-  assert(c != nullptr);
+  CHECK(c != nullptr);
 
   jmethodID mid1 = env->GetMethodID(c, "<init>", "()V");
-  assert(mid1 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid1 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid2 = env->GetMethodID(c, "<init>", "([B)V");
-  assert(mid2 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid2 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid3 = env->GetMethodID(c, "<init>", "([C)V");
-  assert(mid3 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid3 != nullptr);
+  CHECK(!env->ExceptionCheck());
   jmethodID mid4 = env->GetMethodID(c, "<init>", "(Ljava/lang/String;)V");
-  assert(mid4 != nullptr);
-  assert(!env->ExceptionCheck());
+  CHECK(mid4 != nullptr);
+  CHECK(!env->ExceptionCheck());
 
   const char* test_array = "Test";
   int byte_array_length = strlen(test_array);
@@ -587,22 +586,22 @@
 
   // Test NewObject
   jstring s = reinterpret_cast<jstring>(env->NewObject(c, mid2, byte_array));
-  assert(s != nullptr);
-  assert(env->GetStringLength(s) == byte_array_length);
-  assert(env->GetStringUTFLength(s) == byte_array_length);
+  CHECK(s != nullptr);
+  CHECK_EQ(env->GetStringLength(s), byte_array_length);
+  CHECK_EQ(env->GetStringUTFLength(s), byte_array_length);
   const char* chars = env->GetStringUTFChars(s, nullptr);
-  assert(strcmp(test_array, chars) == 0);
+  CHECK_EQ(strcmp(test_array, chars), 0);
   env->ReleaseStringUTFChars(s, chars);
 
   // Test AllocObject and Call(Nonvirtual)VoidMethod
   jstring s1 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s1 != nullptr);
+  CHECK(s1 != nullptr);
   jstring s2 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s2 != nullptr);
+  CHECK(s2 != nullptr);
   jstring s3 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s3 != nullptr);
+  CHECK(s3 != nullptr);
   jstring s4 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s4 != nullptr);
+  CHECK(s4 != nullptr);
 
   jcharArray char_array = env->NewCharArray(5);
   jstring string_arg = env->NewStringUTF("helloworld");
@@ -621,18 +620,18 @@
 
   // Test with global and weak global references
   jstring s5 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s5 != nullptr);
+  CHECK(s5 != nullptr);
   s5 = reinterpret_cast<jstring>(env->NewGlobalRef(s5));
   jstring s6 = reinterpret_cast<jstring>(env->AllocObject(c));
-  assert(s6 != nullptr);
+  CHECK(s6 != nullptr);
   s6 = reinterpret_cast<jstring>(env->NewWeakGlobalRef(s6));
 
   env->CallVoidMethod(s5, mid1);
   env->CallNonvirtualVoidMethod(s6, c, mid2, byte_array);
-  assert(env->GetStringLength(s5) == 0);
-  assert(env->GetStringLength(s6) == byte_array_length);
+  CHECK_EQ(env->GetStringLength(s5), 0);
+  CHECK_EQ(env->GetStringLength(s6), byte_array_length);
   const char* chars6 = env->GetStringUTFChars(s6, nullptr);
-  assert(strcmp(test_array, chars6) == 0);
+  CHECK_EQ(strcmp(test_array, chars6), 0);
   env->ReleaseStringUTFChars(s6, chars6);
 }
 
@@ -664,8 +663,8 @@
  public:
   explicit JniCallDefaultMethodsTest(JNIEnv* env)
       : env_(env), concrete_class_(env_->FindClass("ConcreteClass")) {
-    assert(!env_->ExceptionCheck());
-    assert(concrete_class_ != nullptr);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(concrete_class_ != nullptr);
   }
 
   void Test() {
@@ -688,14 +687,14 @@
   void TestCalls(const char* declaring_class, std::vector<const char*> methods) {
     jmethodID new_method = env_->GetMethodID(concrete_class_, "<init>", "()V");
     jobject obj = env_->NewObject(concrete_class_, new_method);
-    assert(!env_->ExceptionCheck());
-    assert(obj != nullptr);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(obj != nullptr);
     jclass decl_class = env_->FindClass(declaring_class);
-    assert(!env_->ExceptionCheck());
-    assert(decl_class != nullptr);
+    CHECK(!env_->ExceptionCheck());
+    CHECK(decl_class != nullptr);
     for (const char* method : methods) {
       jmethodID method_id = env_->GetMethodID(decl_class, method, "()V");
-      assert(!env_->ExceptionCheck());
+      CHECK(!env_->ExceptionCheck());
       printf("Calling method %s->%s on object of type ConcreteClass\n", declaring_class, method);
       env_->CallVoidMethod(obj, method_id);
       if (env_->ExceptionCheck()) {
@@ -704,10 +703,10 @@
         jmethodID to_string = env_->GetMethodID(
             env_->FindClass("java/lang/Object"), "toString", "()Ljava/lang/String;");
         jstring exception_string = (jstring) env_->CallObjectMethod(thrown, to_string);
-        assert(!env_->ExceptionCheck());
+        CHECK(!env_->ExceptionCheck());
         const char* exception_string_utf8 = env_->GetStringUTFChars(exception_string, nullptr);
-        assert(!env_->ExceptionCheck());
-        assert(exception_string_utf8 != nullptr);
+        CHECK(!env_->ExceptionCheck());
+        CHECK(exception_string_utf8 != nullptr);
         printf("EXCEPTION OCCURED: %s\n", exception_string_utf8);
         env_->ReleaseStringUTFChars(exception_string, exception_string_utf8);
       }
@@ -724,12 +723,12 @@
 
 static void InvokeSpecificMethod(JNIEnv* env, jobject obj, const char* method) {
   jclass lambda_class = env->FindClass("LambdaInterface");
-  assert(!env->ExceptionCheck());
-  assert(lambda_class != nullptr);
+  CHECK(!env->ExceptionCheck());
+  CHECK(lambda_class != nullptr);
   jmethodID method_id = env->GetMethodID(lambda_class, method, "()V");
-  assert(!env->ExceptionCheck());
+  CHECK(!env->ExceptionCheck());
   env->CallVoidMethod(obj, method_id);
-  assert(!env->ExceptionCheck());
+  CHECK(!env->ExceptionCheck());
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_testInvokeLambdaDefaultMethod(
@@ -740,3 +739,6 @@
 extern "C" JNIEXPORT void JNICALL Java_Main_testInvokeLambdaMethod(JNIEnv* e, jclass, jobject l) {
   InvokeSpecificMethod(e, l, "sayHi");
 }
+
+}  // namespace art
+
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 2d26fa1..5304590 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_method-inl.h"
 #include "check_reference_map_visitor.h"
 #include "jni.h"
 
@@ -49,13 +50,7 @@
     if (m_name.compare("f") == 0) {
       CHECK_REGS_CONTAIN_REFS(0x03U, true, 8);  // v8: this
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
-      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      }
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
@@ -68,15 +63,6 @@
       CHECK_REGS_CONTAIN_REFS(0x14U, false, 2);  // v2: y
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
-        // v8: this, v4: x[1], v2: y, v1: x (dead v0: ex)
-        CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 4, 2, 1);
-        // v8: this, v4: x[1], v2: y, v1: x (dead v0: ex)
-        CHECK_REGS_CONTAIN_REFS(0x1eU, true, 8, 4, 2, 1);
-        // v4 is removed from the root set because there is a "merge" operation.
-        // See 0016: if-nez v2, 0020.
-        CHECK_REGS_CONTAIN_REFS(0x20U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      }
       CHECK_REGS_CONTAIN_REFS(0x22U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
 
       if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
diff --git a/test/004-StackWalk/stack_walk_jni.cc b/test/004-StackWalk/stack_walk_jni.cc
index 51bb68f..420224d 100644
--- a/test/004-StackWalk/stack_walk_jni.cc
+++ b/test/004-StackWalk/stack_walk_jni.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "art_method-inl.h"
 #include "check_reference_map_visitor.h"
 #include "jni.h"
 
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index 3d9fd8b..ee5b0c7 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -93,6 +93,7 @@
     --> nombre is 'fubar'
 
 SimplyNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
+SimplyNoted.get(AnnoSimpleTypeInvis) = null
 SubNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
 
 Package annotations:
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index bc89f16..d36d43e 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -185,6 +185,9 @@
         // this is expected to be non-null
         Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SimplyNoted.get(AnnoSimpleType) = " + anno);
+        // this is expected to be null
+        anno = SimplyNoted.class.getAnnotation(AnnoSimpleTypeInvis.class);
+        System.out.println("SimplyNoted.get(AnnoSimpleTypeInvis) = " + anno);
         // this is non-null if the @Inherited tag is present
         anno = SubNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SubNoted.get(AnnoSimpleType) = " + anno);
diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt
index 72656ae..de99872 100644
--- a/test/031-class-attributes/expected.txt
+++ b/test/031-class-attributes/expected.txt
@@ -84,7 +84,7 @@
   enclosingCon: null
   enclosingMeth: null
   modifiers: 1
-  package: package otherpackage, Unknown, version 0.0
+  package: package otherpackage
   declaredClasses: [0]
   member classes: [0]
   isAnnotation: false
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index c2e41c5..38bd525 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -1,6 +1,7 @@
 import otherpackage.OtherPackageClass;
 
 import java.io.Serializable;
+import java.lang.reflect.AbstractMethod;
 import java.lang.reflect.AccessibleObject;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
@@ -221,8 +222,11 @@
     public static String getSignatureAttribute(Object obj) {
         Method method;
         try {
-            Class c = Class.forName("libcore.reflect.AnnotationAccess");
-            method = c.getDeclaredMethod("getSignature", java.lang.reflect.AnnotatedElement.class);
+            Class c = obj.getClass();
+            if (c == Method.class || c == Constructor.class) {
+              c = AbstractMethod.class;
+            }
+            method = c.getDeclaredMethod("getSignatureAttribute");
             method.setAccessible(true);
         } catch (Exception ex) {
             ex.printStackTrace();
@@ -230,7 +234,7 @@
         }
 
         try {
-            return (String) method.invoke(null, obj);
+            return (String) method.invoke(obj);
         } catch (IllegalAccessException ex) {
             throw new RuntimeException(ex);
         } catch (InvocationTargetException ex) {
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index be7023e..2a5f0b9 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -95,3 +95,5 @@
 5.8
 JNI_OnLoad called
 callback
+Found constructor.
+Found constructors with 0 exceptions
diff --git a/test/044-proxy/src/ConstructorProxy.java b/test/044-proxy/src/ConstructorProxy.java
new file mode 100644
index 0000000..95d150c
--- /dev/null
+++ b/test/044-proxy/src/ConstructorProxy.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+
+/**
+ * Tests proxies when used with constructor methods.
+ */
+class ConstructorProxy implements InvocationHandler {
+  public static void main() {
+    try {
+      new ConstructorProxy().runTest();
+    } catch (Exception e) {
+      System.out.println("Unexpected failure occured");
+      e.printStackTrace();
+    }
+  }
+
+  public void runTest() throws Exception {
+    Class<?> proxyClass = Proxy.getProxyClass(
+            getClass().getClassLoader(),
+            new Class<?>[] { Runnable.class }
+    );
+    Constructor<?> constructor = proxyClass.getConstructor(InvocationHandler.class);
+    System.out.println("Found constructor.");
+    // We used to crash when asking the exception types of the constructor, because the runtime was
+    // not using the non-proxy ArtMethod
+    Object[] exceptions = constructor.getExceptionTypes();
+    System.out.println("Found constructors with " + exceptions.length + " exceptions");
+  }
+
+  @Override
+  public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+    return args[0];
+  }
+}
+
diff --git a/test/044-proxy/src/Main.java b/test/044-proxy/src/Main.java
index 1f23b95..9dadb7c 100644
--- a/test/044-proxy/src/Main.java
+++ b/test/044-proxy/src/Main.java
@@ -31,6 +31,7 @@
         NarrowingTest.main(null);
         FloatSelect.main(null);
         NativeProxy.main(args);
+        ConstructorProxy.main();
     }
 
     // The following code maps from the actual proxy class names (eg $Proxy2) to their test output
diff --git a/test/051-thread/thread_test.cc b/test/051-thread/thread_test.cc
index 4215207..079ad40 100644
--- a/test/051-thread/thread_test.cc
+++ b/test/051-thread/thread_test.cc
@@ -28,7 +28,7 @@
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportsThreadPriorities(
     JNIEnv* env ATTRIBUTE_UNUSED,
     jclass clazz ATTRIBUTE_UNUSED) {
-#if defined(__ANDROID__)
+#if defined(ART_TARGET_ANDROID)
   return JNI_TRUE;
 #else
   return JNI_FALSE;
diff --git a/test/068-classloader/expected.txt b/test/068-classloader/expected.txt
index 8725799..ae937e0 100644
--- a/test/068-classloader/expected.txt
+++ b/test/068-classloader/expected.txt
@@ -13,3 +13,4 @@
 Got LinkageError on IDI (early)
 class Main
 Got expected ClassNotFoundException
+Loaded class into null class loader
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index 361e293..b2d843b 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
 /**
  * Class loader test.
  */
@@ -62,6 +65,28 @@
         testSeparation();
 
         testClassForName();
+
+        testNullClassLoader();
+    }
+
+    static void testNullClassLoader() {
+        try {
+            /* this is the "alternate" DEX/Jar file */
+            String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar";
+            /* on Dalvik, this is a DexFile; otherwise, it's null */
+            Class mDexClass = Class.forName("dalvik.system.DexFile");
+            Constructor ctor = mDexClass.getConstructor(new Class[] {String.class});
+            Object mDexFile = ctor.newInstance(DEX_FILE);
+            Method meth = mDexClass.getMethod("loadClass",
+                    new Class[] { String.class, ClassLoader.class });
+            Object klass = meth.invoke(mDexFile, "Mutator", null);
+            if (klass == null) {
+                throw new AssertionError("loadClass with nullclass loader failed");
+            }
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        System.out.println("Loaded class into null class loader");
     }
 
     static void testSeparation() {
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index f41ff2a..50bbe51 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,12 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new Object();
+            new Object() {
+                // Add a finalizer so that the allocation won't be eliminated.
+                public void finalize() {
+                    System.out.print("");
+                }
+            };
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java
index 1be5765..8068721 100644
--- a/test/099-vmdebug/src/Main.java
+++ b/test/099-vmdebug/src/Main.java
@@ -133,7 +133,7 @@
             System.out.println("Got null string");
             return;
         }
-        long n = Long.valueOf(s);
+        long n = Long.parseLong(s);
         if (n < 0) {
             System.out.println("Got negative number " + n);
         }
@@ -157,8 +157,8 @@
                 System.out.println("Got bad bucket " + bucket);
                 continue;
             }
-            long key = Long.valueOf(kv[0]);
-            long value = Long.valueOf(kv[1]);
+            long key = Long.parseLong(kv[0]);
+            long value = Long.parseLong(kv[1]);
             if (key < 0 || value < 0) {
                 System.out.println("Got negative key or value " + bucket);
                 continue;
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index e4988c9..d878e69 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,8 +32,8 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
-[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, private static int java.lang.String.HASHING_SEED, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[int java.lang.String.hash32(), native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.getHashingSeed(), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
+[private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index 54879fb..c9110a9 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <dlfcn.h>
 #include <iostream>
 
 #include "base/casts.h"
@@ -45,6 +46,10 @@
   self->SetTopOfShadowStack(nullptr);
   JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm;
   vm->DetachCurrentThread();
+  // Open ourself again to make sure the native library does not get unloaded from
+  // underneath us due to DestroyJavaVM. b/28406866
+  void* handle = dlopen(kIsDebugBuild ? "libarttestd.so" : "libarttest.so", RTLD_NOW);
+  CHECK(handle != nullptr);
   vm->DestroyJavaVM();
   vm_was_shutdown.store(true);
   // Give threads some time to get stuck in ExceptionCheck.
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index d913efe..bbbb0a6 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -19,7 +19,6 @@
 #include <iostream>
 
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
@@ -29,7 +28,7 @@
 extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
-    jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current());
+    jit->WaitForCompilationToFinish(Thread::Current());
   }
 }
 
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 15683b0..17a6049 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -23,6 +23,7 @@
 
 public class Main {
     static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar";
+    static final String LIBRARY_SEARCH_PATH = System.getProperty("java.library.path");
     static String nativeLibraryName;
 
     public static void main(String[] args) throws Exception {
@@ -32,7 +33,7 @@
             throw new AssertionError("Couldn't find path class loader class");
         }
         Constructor constructor =
-            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+            pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
         try {
             testUnloadClass(constructor);
             testUnloadLoader(constructor);
@@ -49,7 +50,7 @@
             // Test that the oat files are unloaded.
             testOatFilesUnloaded(getPid());
         } catch (Exception e) {
-            System.out.println(e);
+            e.printStackTrace();
         }
     }
 
@@ -118,7 +119,7 @@
     private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader =
             new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, ClassLoader.getSystemClassLoader()));
+                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
         WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
         intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
         boolean isNull = loader.get() == null;
@@ -128,7 +129,7 @@
     private static void testNoUnloadInstance(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader =
             new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, ClassLoader.getSystemClassLoader()));
+                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
         WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
         Object o = intHolder.get().newInstance();
         Runtime.getRuntime().gc();
@@ -138,7 +139,7 @@
 
     private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Class intHolder = loader.loadClass("IntHolder");
         Method getValue = intHolder.getDeclaredMethod("getValue");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
@@ -155,7 +156,7 @@
                                                                 boolean waitForCompilation)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Class intHolder = loader.loadClass("IntHolder");
         Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
         setValue.invoke(intHolder, 2);
@@ -177,7 +178,7 @@
     private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
         throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
-            DEX_FILE, ClassLoader.getSystemClassLoader());
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Class intHolder = loader.loadClass("IntHolder");
         Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
         loadLibrary.invoke(intHolder, nativeLibraryName);
diff --git a/test/146-bad-interface/build b/test/146-bad-interface/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/146-bad-interface/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/146-bad-interface/expected.txt b/test/146-bad-interface/expected.txt
new file mode 100644
index 0000000..3441966
--- /dev/null
+++ b/test/146-bad-interface/expected.txt
@@ -0,0 +1 @@
+running invoke
diff --git a/test/146-bad-interface/info.txt b/test/146-bad-interface/info.txt
new file mode 100644
index 0000000..38f188e
--- /dev/null
+++ b/test/146-bad-interface/info.txt
@@ -0,0 +1 @@
+Check whether a duplicate class can invoke-interface on an unresolved method.
diff --git a/test/146-bad-interface/run b/test/146-bad-interface/run
new file mode 100755
index 0000000..ceef6b8
--- /dev/null
+++ b/test/146-bad-interface/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run as no-dex-file-fallback to confirm that even though the -ex file has a symbolic
+# reference to A, there's no class-def, so we don't detect a collision.
+exec ${RUN} --secondary "${@}"
diff --git a/test/146-bad-interface/smali/invoke_inf.smali b/test/146-bad-interface/smali/invoke_inf.smali
new file mode 100644
index 0000000..c5101e0
--- /dev/null
+++ b/test/146-bad-interface/smali/invoke_inf.smali
@@ -0,0 +1,24 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeInf;
+.super Ljava/lang/Object;
+
+.method public static doInvoke(LIface;)V
+.locals 0
+    invoke-interface {p0}, LIface;->invoke()V
+    return-void
+.end method
+
diff --git a/test/146-bad-interface/src-ex/A.java b/test/146-bad-interface/src-ex/A.java
new file mode 100644
index 0000000..a30a5f2
--- /dev/null
+++ b/test/146-bad-interface/src-ex/A.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A implements Iface {
+}
diff --git a/test/146-bad-interface/src-ex/Iface.java b/test/146-bad-interface/src-ex/Iface.java
new file mode 100644
index 0000000..921e25c
--- /dev/null
+++ b/test/146-bad-interface/src-ex/Iface.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface {
+  public default void aPadding() {}
+  public default void bPadding() {}
+  public default void cPadding() {}
+  public default void dPadding() {}
+  public default void invoke() {
+    System.out.println("running invoke");
+  }
+  public default void wPadding() {}
+  public default void xPadding() {}
+  public default void yPadding() {}
+  public default void zPadding() {}
+}
diff --git a/test/146-bad-interface/src/Main.java b/test/146-bad-interface/src/Main.java
new file mode 100644
index 0000000..5534bb4
--- /dev/null
+++ b/test/146-bad-interface/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import dalvik.system.PathClassLoader;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+  static final String DEX_LOCATION = System.getenv("DEX_LOCATION");
+  static final String DEX_FILES =
+      DEX_LOCATION + "/146-bad-interface-ex.jar" + ":" +
+      DEX_LOCATION + "/146-bad-interface.jar";
+  public static void main(String[] args) {
+    try {
+      PathClassLoader p = new PathClassLoader(DEX_FILES, Main.class.getClassLoader());
+      Class<?> c = Class.forName("A", true, p);
+      Object o = c.newInstance();
+      Class<?> runner = Class.forName("InvokeInf", true, p);
+      Class<?> arg = Class.forName("Iface", true, p);
+      Method r = runner.getDeclaredMethod("doInvoke", arg);
+      r.invoke(null, o);
+    } catch (Throwable t) {
+      System.out.println("Error occurred");
+      System.out.println(t);
+      t.printStackTrace();
+    }
+  }
+}
diff --git a/test/147-stripped-dex-fallback/expected.txt b/test/147-stripped-dex-fallback/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/147-stripped-dex-fallback/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/147-stripped-dex-fallback/info.txt b/test/147-stripped-dex-fallback/info.txt
new file mode 100644
index 0000000..72a2ca8
--- /dev/null
+++ b/test/147-stripped-dex-fallback/info.txt
@@ -0,0 +1,2 @@
+Verify that we fallback to running out of dex code in the oat file if there is
+no image and the original dex code has been stripped.
diff --git a/test/147-stripped-dex-fallback/run b/test/147-stripped-dex-fallback/run
new file mode 100755
index 0000000..e594010
--- /dev/null
+++ b/test/147-stripped-dex-fallback/run
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ensure flags includes prebuild.
+flags="$@"
+if [[ "${flags}" == *--no-prebuild* ]] ; then
+  echo "Test 147-stripped-dex-fallback is not intended to run in no-prebuild mode."
+  exit 1
+fi
+
+${RUN} ${flags} --strip-dex --no-dex2oat
diff --git a/test/147-stripped-dex-fallback/src/Main.java b/test/147-stripped-dex-fallback/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/147-stripped-dex-fallback/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/370-dex-v37/build b/test/370-dex-v37/build
new file mode 100755
index 0000000..f472428
--- /dev/null
+++ b/test/370-dex-v37/build
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@"
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Change the generated dex file to have a v36 magic number if it is version 35
+  if test -f classes.dex && head -c 7 classes.dex | grep -q 035; then
+    # place ascii value '037' into the classes.dex file starting at byte 4.
+    printf '037' | dd status=none conv=notrunc of=classes.dex bs=1 seek=4 count=3
+    rm -f $TEST_NAME.jar
+    zip $TEST_NAME.jar classes.dex
+  fi
+fi
diff --git a/test/370-dex-v37/expected.txt b/test/370-dex-v37/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/370-dex-v37/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/370-dex-v37/info.txt b/test/370-dex-v37/info.txt
new file mode 100644
index 0000000..5ca9c76
--- /dev/null
+++ b/test/370-dex-v37/info.txt
@@ -0,0 +1 @@
+Print "Hello, World!" with a version 37 dex file.
diff --git a/test/370-dex-v37/src/Main.java b/test/370-dex-v37/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/370-dex-v37/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index c96b18c..ddc2f77 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -28,10 +28,6 @@
   }
 
   /// CHECK-START: Main Main.thisTest() builder (after)
-  /// CHECK:         NullCheck
-  /// CHECK:         InvokeStaticOrDirect
-
-  /// CHECK-START: Main Main.thisTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main thisTest() {
@@ -40,12 +36,10 @@
 
   /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK:         NewInstance
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
   public Main newInstanceRemoveTest() {
     Main m = new Main();
@@ -54,13 +48,10 @@
 
   /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK:         NewArray
-  /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (after)
-  /// CHECK:         NewArray
+  /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
-  /// CHECK:         ArrayGet
   public Main newArrayRemoveTest() {
     Main[] ms = new Main[1];
     return ms[0];
@@ -179,9 +170,6 @@
   }
 
   /// CHECK-START: Main Main.scopeRemoveTest(int, Main) builder (after)
-  /// CHECK:         NullCheck
-
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeRemoveTest(int count, Main a) {
     Main m = null;
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 66e1d92..41771b5 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -927,6 +927,32 @@
     }
   }
 
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.nonzeroLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void nonzeroLength(int[] a) {
+    if (a.length != 0) {
+      a[0] = 112;
+    }
+  }
+
+  /// CHECK-START: void Main.knownLength(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.knownLength(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  public static void knownLength(int[] a) {
+    if (a.length == 2) {
+      a[0] = -1;
+      a[1] = -2;
+    }
+  }
+
   static int[][] mA;
 
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
@@ -1586,6 +1612,26 @@
       }
     }
 
+    nonzeroLength(array);
+    if (array[0] != 112) {
+      System.out.println("nonzero length failed!");
+    }
+
+    knownLength(array);
+    if (array[0] != 112 || array[1] != 1) {
+      System.out.println("nonzero length failed!");
+    }
+    array = new int[2];
+    knownLength(array);
+    if (array[0] != -1 || array[1] != -2) {
+      System.out.println("nonzero length failed!");
+    }
+
+    // Zero length array does not break.
+    array = new int[0];
+    nonzeroLength(array);
+    knownLength(array);
+
     mA = new int[4][4];
     for (int i = 0; i < 4; i++) {
       for (int j = 0; j < 4; j++) {
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index ea6df62..8de6318 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -109,7 +109,8 @@
                 /* Load and initialize System */
   /// CHECK-NEXT: LoadClass gen_clinit_check:true
   /// CHECK-NEXT: StaticFieldGet
-  /// CHECK-NEXT: LoadString
+  // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
+  /// CHECK:      LoadString
   /// CHECK-NEXT: NullCheck
   /// CHECK-NEXT: InvokeVirtual
   public static void bar() {
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 8ec840d..733a1dd 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -21,11 +21,11 @@
 
 ## CHECK-START: int Builder.testMultipleTryCatch(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
-## CHECK:  <<Minus3:i\d+>>  IntConstant -3
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK-DAG:  <<Minus3:i\d+>>  IntConstant -3
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
@@ -236,10 +236,10 @@
 
 ## CHECK-START: int Builder.testMultipleExits(int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
@@ -312,10 +312,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnter1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnter1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1>>"
@@ -403,10 +403,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary_Reverse(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BGoto:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BGoto:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BGoto>>"
 ## CHECK:  successors       "<<BEnter2:B\d+>>"
@@ -504,9 +504,9 @@
 
 ## CHECK-START: int Builder.testNestedTry(int, int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
@@ -728,8 +728,17 @@
 ## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  Goto
 
@@ -757,10 +766,6 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BReturn>>"
-## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
-## CHECK:  Return
-
 ## CHECK:  name             "<<BSplit1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BEnterTry2>>"
@@ -838,7 +843,15 @@
 ## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
@@ -868,10 +881,6 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BReturn>>"
-## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
-## CHECK:  Return
-
 ## CHECK:  name             "<<BSplit1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BTry2>>"
@@ -968,12 +977,12 @@
 ## CHECK-START: int Builder.testCatchLoop(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
+## CHECK:  successors       "<<BSplit:B\d+>>"
 
-## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry:B\d+>>" "<<BExitTry:B\d+>>"
-## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>"
+## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry>>"
@@ -984,13 +993,14 @@
 ## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry>>"
-## CHECK:  successors       "<<BExitTry>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
@@ -1001,14 +1011,9 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
-## CHECK:  successors       "<<BEnterTry>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit2>>"
+## CHECK:  name             "<<BSplit>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry>>"
 ## CHECK:  Goto
 
 .method public static testCatchLoop(III)I
@@ -1037,23 +1042,25 @@
 ## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -1062,14 +1069,14 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1079,14 +1086,9 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit2>>"
+## CHECK:  name             "<<BSplit>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
 ## CHECK:  Goto
 
 .method public static testHandlerEdge1(III)I
@@ -1109,17 +1111,17 @@
 ## CHECK-START: int Builder.testHandlerEdge2(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BSplit4:B\d+>>"
-
-## CHECK:  name             "<<BCatch1:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
 ## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BCatch2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1:B\d+>>" "<<BExitTry1:B\d+>>"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1:B\d+>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
@@ -1129,31 +1131,31 @@
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry1:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BExitTry1>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry1>>"
+## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BEnterTry1>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BCatch1>>"
 ## CHECK:  successors       "<<BTry1>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BSplit2>>"
+## CHECK:  predecessors     "<<BSplit2:B\d+>>" "<<BCatch2>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BSplit3:B\d+>>"
+## CHECK:  successors       "<<BSplit2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1164,23 +1166,13 @@
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit4>>" "<<BCatch1>>"
+## CHECK:  predecessors     "B0"
 ## CHECK:  successors       "<<BEnterTry1>>"
 ## CHECK:  Goto
 
 ## CHECK:  name             "<<BSplit2>>"
-## CHECK:  predecessors     "<<BCatch2>>" "<<BSplit3>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit3>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BSplit2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit4>>"
-## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
 ## CHECK:  Goto
 
 .method public static testHandlerEdge2(III)I
@@ -1365,7 +1357,10 @@
     .catchall {:try_start .. :try_end} :catch_all
 .end method
 
-## CHECK-START: int Builder.testSynchronized(java.lang.Object) builder (after)
+# Test that a throw-catch loop on monitor-exit is eliminated.
+# Note that we do not test this until after DCE which merges trivially split blocks.
+
+## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination (after)
 ## CHECK:      flags "catch_block"
 ## CHECK-NOT:  end_block
 ## CHECK:      MonitorOperation kind:exit
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 1fd5fb2..bfc0b20 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -25,23 +25,24 @@
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted:B\d+>>"
+## CHECK-NEXT: successors       "<<BAdd:B\d+>>"
 
-## CHECK:      name             "<<BCatch:B\d+>>"
+## CHECK:      name             "<<BAdd>>"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors     "B1" "<<BCatch:B\d+>>"
+## CHECK-NEXT: successors
+## CHECK-NEXT: xhandlers
+## CHECK-NOT:  end_block
+## CHECK:      Add
+
+## CHECK:      name             "<<BCatch>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted>>"
+## CHECK-NEXT: successors       "<<BAdd>>"
 ## CHECK-NEXT: xhandlers
 ## CHECK-NEXT: flags            "catch_block"
-## CHECK-NOT:  Add
-
-## CHECK:      name             "<<BExtracted>>"
-## CHECK-NEXT: from_bci
-## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B1" "<<BCatch>>"
-## CHECK-NOT:  flags            "catch_block"
-## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
diff --git a/test/525-checker-arrays-and-fields/info.txt b/test/525-checker-arrays-and-fields/info.txt
deleted file mode 100644
index 3e16abf..0000000
--- a/test/525-checker-arrays-and-fields/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Test on (in)variant static and instance field and array references in loops.
diff --git a/test/525-checker-arrays-and-fields/src/Main.java b/test/525-checker-arrays-and-fields/src/Main.java
deleted file mode 100644
index a635a51..0000000
--- a/test/525-checker-arrays-and-fields/src/Main.java
+++ /dev/null
@@ -1,1099 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// Test on (in)variant static and instance field and array references in loops.
-//
-public class Main {
-
-  private static Object anObject = new Object();
-  private static Object anotherObject = new Object();
-
-  //
-  // Static fields.
-  //
-
-  private static boolean sZ;
-  private static byte sB;
-  private static char sC;
-  private static short sS;
-  private static int sI;
-  private static long sJ;
-  private static float sF;
-  private static double sD;
-  private static Object sL;
-
-  //
-  // Static arrays.
-  //
-
-  private static boolean[] sArrZ;
-  private static byte[] sArrB;
-  private static char[] sArrC;
-  private static short[] sArrS;
-  private static int[] sArrI;
-  private static long[] sArrJ;
-  private static float[] sArrF;
-  private static double[] sArrD;
-  private static Object[] sArrL;
-
-  //
-  // Instance fields.
-  //
-
-  private boolean mZ;
-  private byte mB;
-  private char mC;
-  private short mS;
-  private int mI;
-  private long mJ;
-  private float mF;
-  private double mD;
-  private Object mL;
-
-  //
-  // Instance arrays.
-  //
-
-  private boolean[] mArrZ;
-  private byte[] mArrB;
-  private char[] mArrC;
-  private short[] mArrS;
-  private int[] mArrI;
-  private long[] mArrJ;
-  private float[] mArrF;
-  private double[] mArrD;
-  private Object[] mArrL;
-
-  //
-  // Loops on static arrays with invariant static field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopZ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopB() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopB() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopC() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopC() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopS() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopS() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopI() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopI() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopJ() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopF() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopF() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopD() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopD() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-    }
-  }
-
-  /// CHECK-START: void Main.SInvLoopL() licm (before)
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SInvLoopL() licm (after)
-  /// CHECK-DAG: StaticFieldGet loop:none
-  /// CHECK-DAG: StaticFieldGet loop:none
-
-  private static void SInvLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-    }
-  }
-
-  //
-  // Loops on static arrays with variant static field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private static void SVarLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = sZ;
-      if (i == 10)
-        sZ = !sZ;
-    }
-  }
-
-  private static void SVarLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = sB;
-      if (i == 10)
-        sB++;
-    }
-  }
-
-  private static void SVarLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = sC;
-      if (i == 10)
-        sC++;
-    }
-  }
-
-  private static void SVarLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = sS;
-      if (i == 10)
-        sS++;
-    }
-  }
-
-  private static void SVarLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sI;
-      if (i == 10)
-        sI++;
-    }
-  }
-
-  private static void SVarLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sJ;
-      if (i == 10)
-        sJ++;
-    }
-  }
-
-  private static void SVarLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sF;
-      if (i == 10)
-        sF++;
-    }
-  }
-
-  private static void SVarLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sD;
-      if (i == 10)
-        sD++;
-    }
-  }
-
-  private static void SVarLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = sL;
-      if (i == 10)
-        sL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on static arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopZ() {
-    for (int i = 0; i < sArrZ.length; i++) {
-      sArrZ[i] = !sArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopB() {
-    for (int i = 0; i < sArrB.length; i++) {
-      sArrB[i] = (byte)(sArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopC() {
-    for (int i = 0; i < sArrC.length; i++) {
-      sArrC[i] = (char)(sArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopS() {
-    for (int i = 0; i < sArrS.length; i++) {
-      sArrS[i] = (short)(sArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopI() {
-    for (int i = 0; i < sArrI.length; i++) {
-      sArrI[i] = sArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopJ() {
-    for (int i = 0; i < sArrJ.length; i++) {
-      sArrJ[i] = sArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopF() {
-    for (int i = 0; i < sArrF.length; i++) {
-      sArrF[i] = sArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopD() {
-    for (int i = 0; i < sArrD.length; i++) {
-      sArrD[i] = sArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.SCrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private static void SCrossOverLoopL() {
-    for (int i = 0; i < sArrL.length; i++) {
-      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with invariant instance field references.
-  // The checker is used to ensure hoisting occurred.
-  //
-
-  /// CHECK-START: void Main.InvLoopZ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopZ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopB() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopB() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopC() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopC() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopS() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopS() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopI() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopI() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopJ() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopJ() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopF() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopF() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopD() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopD() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-    }
-  }
-
-  /// CHECK-START: void Main.InvLoopL() licm (before)
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.InvLoopL() licm (after)
-  /// CHECK-DAG: InstanceFieldGet loop:none
-  /// CHECK-DAG: InstanceFieldGet loop:none
-
-  private void InvLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-    }
-  }
-
-  //
-  // Loops on instance arrays with variant instance field references.
-  // Incorrect hoisting is detected by incorrect outcome.
-  //
-
-  private void VarLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = mZ;
-      if (i == 10)
-        mZ = !mZ;
-    }
-  }
-
-  private void VarLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = mB;
-      if (i == 10)
-        mB++;
-    }
-  }
-
-  private void VarLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = mC;
-      if (i == 10)
-        mC++;
-    }
-  }
-
-  private void VarLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = mS;
-      if (i == 10)
-        mS++;
-    }
-  }
-
-  private void VarLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mI;
-      if (i == 10)
-        mI++;
-    }
-  }
-
-  private void VarLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mJ;
-      if (i == 10)
-        mJ++;
-    }
-  }
-
-  private void VarLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mF;
-      if (i == 10)
-        mF++;
-    }
-  }
-
-  private void VarLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mD;
-      if (i == 10)
-        mD++;
-    }
-  }
-
-  private void VarLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = mL;
-      if (i == 10)
-        mL = anotherObject;
-    }
-  }
-
-  //
-  // Loops on instance arrays with a cross-over reference.
-  // Incorrect hoisting is detected by incorrect outcome.
-  // In addition, the checker is used to detect no hoisting.
-  //
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopZ() {
-    for (int i = 0; i < mArrZ.length; i++) {
-      mArrZ[i] = !mArrZ[20];
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopB() {
-    for (int i = 0; i < mArrB.length; i++) {
-      mArrB[i] = (byte)(mArrB[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopC() {
-    for (int i = 0; i < mArrC.length; i++) {
-      mArrC[i] = (char)(mArrC[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopS() {
-    for (int i = 0; i < mArrS.length; i++) {
-      mArrS[i] = (short)(mArrS[20] + 2);
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopI() {
-    for (int i = 0; i < mArrI.length; i++) {
-      mArrI[i] = mArrI[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopJ() {
-    for (int i = 0; i < mArrJ.length; i++) {
-      mArrJ[i] = mArrJ[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopF() {
-    for (int i = 0; i < mArrF.length; i++) {
-      mArrF[i] = mArrF[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopD() {
-    for (int i = 0; i < mArrD.length; i++) {
-      mArrD[i] = mArrD[20] + 2;
-    }
-  }
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
-  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
-  /// CHECK-DAG: ArraySet loop:{{B\d+}}
-
-  private void CrossOverLoopL() {
-    for (int i = 0; i < mArrL.length; i++) {
-      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
-    }
-  }
-
-  //
-  // Driver and testers.
-  //
-
-  public static void main(String[] args) {
-    DoStaticTests();
-    new Main().DoInstanceTests();
-  }
-
-  private static void DoStaticTests() {
-    // Type Z.
-    sZ = true;
-    sArrZ = new boolean[100];
-    SInvLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(true, sArrZ[i]);
-    }
-    SVarLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 10, sArrZ[i]);
-    }
-    SCrossOverLoopZ();
-    for (int i = 0; i < sArrZ.length; i++) {
-      expectEquals(i <= 20, sArrZ[i]);
-    }
-    // Type B.
-    sB = 1;
-    sArrB = new byte[100];
-    SInvLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(1, sArrB[i]);
-    }
-    SVarLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
-    }
-    SCrossOverLoopB();
-    for (int i = 0; i < sArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, sArrB[i]);
-    }
-    // Type C.
-    sC = 2;
-    sArrC = new char[100];
-    SInvLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(2, sArrC[i]);
-    }
-    SVarLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
-    }
-    SCrossOverLoopC();
-    for (int i = 0; i < sArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, sArrC[i]);
-    }
-    // Type S.
-    sS = 3;
-    sArrS = new short[100];
-    SInvLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(3, sArrS[i]);
-    }
-    SVarLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
-    }
-    SCrossOverLoopS();
-    for (int i = 0; i < sArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, sArrS[i]);
-    }
-    // Type I.
-    sI = 4;
-    sArrI = new int[100];
-    SInvLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(4, sArrI[i]);
-    }
-    SVarLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
-    }
-    SCrossOverLoopI();
-    for (int i = 0; i < sArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, sArrI[i]);
-    }
-    // Type J.
-    sJ = 5;
-    sArrJ = new long[100];
-    SInvLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(5, sArrJ[i]);
-    }
-    SVarLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
-    }
-    SCrossOverLoopJ();
-    for (int i = 0; i < sArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, sArrJ[i]);
-    }
-    // Type F.
-    sF = 6.0f;
-    sArrF = new float[100];
-    SInvLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(6, sArrF[i]);
-    }
-    SVarLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
-    }
-    SCrossOverLoopF();
-    for (int i = 0; i < sArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, sArrF[i]);
-    }
-    // Type D.
-    sD = 7.0;
-    sArrD = new double[100];
-    SInvLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(7.0, sArrD[i]);
-    }
-    SVarLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
-    }
-    SCrossOverLoopD();
-    for (int i = 0; i < sArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, sArrD[i]);
-    }
-    // Type L.
-    sL = anObject;
-    sArrL = new Object[100];
-    SInvLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(anObject, sArrL[i]);
-    }
-    SVarLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
-    }
-    SCrossOverLoopL();
-    for (int i = 0; i < sArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
-    }
-  }
-
-  private void DoInstanceTests() {
-    // Type Z.
-    mZ = true;
-    mArrZ = new boolean[100];
-    InvLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(true, mArrZ[i]);
-    }
-    VarLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 10, mArrZ[i]);
-    }
-    CrossOverLoopZ();
-    for (int i = 0; i < mArrZ.length; i++) {
-      expectEquals(i <= 20, mArrZ[i]);
-    }
-    // Type B.
-    mB = 1;
-    mArrB = new byte[100];
-    InvLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(1, mArrB[i]);
-    }
-    VarLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
-    }
-    CrossOverLoopB();
-    for (int i = 0; i < mArrB.length; i++) {
-      expectEquals(i <= 20 ? 4 : 6, mArrB[i]);
-    }
-    // Type C.
-    mC = 2;
-    mArrC = new char[100];
-    InvLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(2, mArrC[i]);
-    }
-    VarLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
-    }
-    CrossOverLoopC();
-    for (int i = 0; i < mArrC.length; i++) {
-      expectEquals(i <= 20 ? 5 : 7, mArrC[i]);
-    }
-    // Type S.
-    mS = 3;
-    mArrS = new short[100];
-    InvLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(3, mArrS[i]);
-    }
-    VarLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
-    }
-    CrossOverLoopS();
-    for (int i = 0; i < mArrS.length; i++) {
-      expectEquals(i <= 20 ? 6 : 8, mArrS[i]);
-    }
-    // Type I.
-    mI = 4;
-    mArrI = new int[100];
-    InvLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(4, mArrI[i]);
-    }
-    VarLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
-    }
-    CrossOverLoopI();
-    for (int i = 0; i < mArrI.length; i++) {
-      expectEquals(i <= 20 ? 7 : 9, mArrI[i]);
-    }
-    // Type J.
-    mJ = 5;
-    mArrJ = new long[100];
-    InvLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(5, mArrJ[i]);
-    }
-    VarLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
-    }
-    CrossOverLoopJ();
-    for (int i = 0; i < mArrJ.length; i++) {
-      expectEquals(i <= 20 ? 8 : 10, mArrJ[i]);
-    }
-    // Type F.
-    mF = 6.0f;
-    mArrF = new float[100];
-    InvLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(6, mArrF[i]);
-    }
-    VarLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
-    }
-    CrossOverLoopF();
-    for (int i = 0; i < mArrF.length; i++) {
-      expectEquals(i <= 20 ? 9 : 11, mArrF[i]);
-    }
-    // Type D.
-    mD = 7.0;
-    mArrD = new double[100];
-    InvLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(7.0, mArrD[i]);
-    }
-    VarLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
-    }
-    CrossOverLoopD();
-    for (int i = 0; i < mArrD.length; i++) {
-      expectEquals(i <= 20 ? 10 : 12, mArrD[i]);
-    }
-    // Type L.
-    mL = anObject;
-    mArrL = new Object[100];
-    InvLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(anObject, mArrL[i]);
-    }
-    VarLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
-    }
-    CrossOverLoopL();
-    for (int i = 0; i < mArrL.length; i++) {
-      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
-    }
-  }
-
-  private static void expectEquals(boolean expected, boolean result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(byte expected, byte result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(char expected, char result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(short expected, short result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(int expected, int result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(float expected, float result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(double expected, double result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  private static void expectEquals(Object expected, Object result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-}
diff --git a/test/525-checker-arrays-fields1/expected.txt b/test/525-checker-arrays-fields1/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields1/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields1/info.txt b/test/525-checker-arrays-fields1/info.txt
new file mode 100644
index 0000000..7d0a088
--- /dev/null
+++ b/test/525-checker-arrays-fields1/info.txt
@@ -0,0 +1 @@
+Test on (in)variant static field and array references in loops.
diff --git a/test/525-checker-arrays-fields1/src/Main.java b/test/525-checker-arrays-fields1/src/Main.java
new file mode 100644
index 0000000..ba0476a
--- /dev/null
+++ b/test/525-checker-arrays-fields1/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant static field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Static fields.
+  //
+
+  private static boolean sZ;
+  private static byte sB;
+  private static char sC;
+  private static short sS;
+  private static int sI;
+  private static long sJ;
+  private static float sF;
+  private static double sD;
+  private static Object sL;
+
+  //
+  // Static arrays.
+  //
+
+  private static boolean[] sArrZ;
+  private static byte[] sArrB;
+  private static char[] sArrC;
+  private static short[] sArrS;
+  private static int[] sArrI;
+  private static long[] sArrJ;
+  private static float[] sArrF;
+  private static double[] sArrD;
+  private static Object[] sArrL;
+
+  //
+  // Loops on static arrays with invariant static field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: StaticFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: StaticFieldGet loop:none
+  /// CHECK-DAG: StaticFieldGet loop:none
+
+  private static void InvLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+    }
+  }
+
+  //
+  // Loops on static arrays with variant static field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private static void VarLoopZ() {
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = sZ;
+      if (i == 10)
+        sZ = !sZ;
+    }
+  }
+
+  private static void VarLoopB() {
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = sB;
+      if (i == 10)
+        sB++;
+    }
+  }
+
+  private static void VarLoopC() {
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = sC;
+      if (i == 10)
+        sC++;
+    }
+  }
+
+  private static void VarLoopS() {
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = sS;
+      if (i == 10)
+        sS++;
+    }
+  }
+
+  private static void VarLoopI() {
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sI;
+      if (i == 10)
+        sI++;
+    }
+  }
+
+  private static void VarLoopJ() {
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sJ;
+      if (i == 10)
+        sJ++;
+    }
+  }
+
+  private static void VarLoopF() {
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sF;
+      if (i == 10)
+        sF++;
+    }
+  }
+
+  private static void VarLoopD() {
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sD;
+      if (i == 10)
+        sD++;
+    }
+  }
+
+  private static void VarLoopL() {
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = sL;
+      if (i == 10)
+        sL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on static arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopZ() {
+    sArrZ[20] = false;
+    for (int i = 0; i < sArrZ.length; i++) {
+      sArrZ[i] = !sArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopB() {
+    sArrB[20] = 11;
+    for (int i = 0; i < sArrB.length; i++) {
+      sArrB[i] = (byte)(sArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopC() {
+    sArrC[20] = 11;
+    for (int i = 0; i < sArrC.length; i++) {
+      sArrC[i] = (char)(sArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopS() {
+    sArrS[20] = 11;
+    for (int i = 0; i < sArrS.length; i++) {
+      sArrS[i] = (short)(sArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopI() {
+    sArrI[20] = 11;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = sArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopJ() {
+    sArrJ[20] = 11;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = sArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopF() {
+    sArrF[20] = 11;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopD() {
+    sArrD[20] = 11;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void CrossOverLoopL() {
+    sArrL[20] = anotherObject;
+    for (int i = 0; i < sArrL.length; i++) {
+      sArrL[i] = (sArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on static arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop1() {
+    sArrF[20] = -1;
+    for (int i = 0; i < sArrI.length; i++) {
+      sArrI[i] = (int) sArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop2() {
+    sArrI[20] = -2;
+    for (int i = 0; i < sArrF.length; i++) {
+      sArrF[i] = sArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop3() {
+    sArrD[20] = -3;
+    for (int i = 0; i < sArrJ.length; i++) {
+      sArrJ[i] = (long) sArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private static void FalseCrossOverLoop4() {
+    sArrJ[20] = -4;
+    for (int i = 0; i < sArrD.length; i++) {
+      sArrD[i] = sArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    DoStaticTests();
+    System.out.println("passed");
+  }
+
+  private static void DoStaticTests() {
+    // Type Z.
+    sZ = true;
+    sArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(true, sArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 10, sArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < sArrZ.length; i++) {
+      expectEquals(i <= 20, sArrZ[i]);
+    }
+    // Type B.
+    sB = 1;
+    sArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(1, sArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, sArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < sArrB.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrB[i]);
+    }
+    // Type C.
+    sC = 2;
+    sArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(2, sArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, sArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < sArrC.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrC[i]);
+    }
+    // Type S.
+    sS = 3;
+    sArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(3, sArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, sArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < sArrS.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrS[i]);
+    }
+    // Type I.
+    sI = 4;
+    sArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(4, sArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, sArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrI[i]);
+    }
+    // Type J.
+    sJ = 5;
+    sArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(5, sArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, sArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrJ[i]);
+    }
+    // Type F.
+    sF = 6.0f;
+    sArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(6, sArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, sArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrF[i]);
+    }
+    // Type D.
+    sD = 7.0;
+    sArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(7.0, sArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, sArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(i <= 20 ? 13 : 15, sArrD[i]);
+    }
+    // Type L.
+    sL = anObject;
+    sArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(anObject, sArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, sArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < sArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, sArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < sArrI.length; i++) {
+      expectEquals(-3, sArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < sArrF.length; i++) {
+      expectEquals(-4, sArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < sArrJ.length; i++) {
+      expectEquals(-5, sArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < sArrD.length; i++) {
+      expectEquals(-6, sArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-fields2/expected.txt b/test/525-checker-arrays-fields2/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/525-checker-arrays-fields2/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/525-checker-arrays-fields2/info.txt b/test/525-checker-arrays-fields2/info.txt
new file mode 100644
index 0000000..3464e54
--- /dev/null
+++ b/test/525-checker-arrays-fields2/info.txt
@@ -0,0 +1 @@
+Test on (in)variant instance field and array references in loops.
diff --git a/test/525-checker-arrays-fields2/src/Main.java b/test/525-checker-arrays-fields2/src/Main.java
new file mode 100644
index 0000000..2aa40fc
--- /dev/null
+++ b/test/525-checker-arrays-fields2/src/Main.java
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on (in)variant instance field and array references in loops.
+//
+public class Main {
+
+  private static Object anObject = new Object();
+  private static Object anotherObject = new Object();
+
+  //
+  // Instance fields.
+  //
+
+  private boolean mZ;
+  private byte mB;
+  private char mC;
+  private short mS;
+  private int mI;
+  private long mJ;
+  private float mF;
+  private double mD;
+  private Object mL;
+
+  //
+  // Instance arrays.
+  //
+
+  private boolean[] mArrZ;
+  private byte[] mArrB;
+  private char[] mArrC;
+  private short[] mArrS;
+  private int[] mArrI;
+  private long[] mArrJ;
+  private float[] mArrF;
+  private double[] mArrD;
+  private Object[] mArrL;
+
+  //
+  // Loops on instance arrays with invariant instance field references.
+  // The checker is used to ensure hoisting occurred.
+  //
+
+  /// CHECK-START: void Main.InvLoopZ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopZ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopB() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopB() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopC() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopC() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopS() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopS() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopI() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopI() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopJ() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopJ() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopF() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopF() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopD() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopD() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+    }
+  }
+
+  /// CHECK-START: void Main.InvLoopL() licm (before)
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+  /// CHECK-DAG: InstanceFieldGet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.InvLoopL() licm (after)
+  /// CHECK-DAG: InstanceFieldGet loop:none
+  /// CHECK-DAG: InstanceFieldGet loop:none
+
+  private void InvLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+    }
+  }
+
+  //
+  // Loops on instance arrays with variant instance field references.
+  // Incorrect hoisting is detected by incorrect outcome.
+  //
+
+  private void VarLoopZ() {
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = mZ;
+      if (i == 10)
+        mZ = !mZ;
+    }
+  }
+
+  private void VarLoopB() {
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = mB;
+      if (i == 10)
+        mB++;
+    }
+  }
+
+  private void VarLoopC() {
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = mC;
+      if (i == 10)
+        mC++;
+    }
+  }
+
+  private void VarLoopS() {
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = mS;
+      if (i == 10)
+        mS++;
+    }
+  }
+
+  private void VarLoopI() {
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mI;
+      if (i == 10)
+        mI++;
+    }
+  }
+
+  private void VarLoopJ() {
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mJ;
+      if (i == 10)
+        mJ++;
+    }
+  }
+
+  private void VarLoopF() {
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mF;
+      if (i == 10)
+        mF++;
+    }
+  }
+
+  private void VarLoopD() {
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mD;
+      if (i == 10)
+        mD++;
+    }
+  }
+
+  private void VarLoopL() {
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = mL;
+      if (i == 10)
+        mL = anotherObject;
+    }
+  }
+
+  //
+  // Loops on instance arrays with a cross-over reference.
+  // Incorrect hoisting is detected by incorrect outcome.
+  // In addition, the checker is used to detect no hoisting.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopZ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopZ() {
+    mArrZ[20] = false;
+    for (int i = 0; i < mArrZ.length; i++) {
+      mArrZ[i] = !mArrZ[20];
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopB() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopB() {
+    mArrB[20] = 111;
+    for (int i = 0; i < mArrB.length; i++) {
+      mArrB[i] = (byte)(mArrB[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopC() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopC() {
+    mArrC[20] = 111;
+    for (int i = 0; i < mArrC.length; i++) {
+      mArrC[i] = (char)(mArrC[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopS() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopS() {
+    mArrS[20] = 111;
+    for (int i = 0; i < mArrS.length; i++) {
+      mArrS[i] = (short)(mArrS[20] + 2);
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopI() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopI() {
+    mArrI[20] = 111;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = mArrI[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopJ() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopJ() {
+    mArrJ[20] = 111;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = mArrJ[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopF() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopF() {
+    mArrF[20] = 111;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrF[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopD() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopD() {
+    mArrD[20] = 111;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrD[20] + 2;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.CrossOverLoopL() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void CrossOverLoopL() {
+    mArrL[20] = anotherObject;
+    for (int i = 0; i < mArrL.length; i++) {
+      mArrL[i] = (mArrL[20] == anObject) ? anotherObject : anObject;
+    }
+  }
+
+  //
+  // False cross-over loops on instance arrays with data types (I/F and J/D) that used
+  // to be aliased in an older version of the compiler. This alias has been removed,
+  // however, which enables hoisting the invariant array reference.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop1() {
+    mArrF[20] = -1;
+    for (int i = 0; i < mArrI.length; i++) {
+      mArrI[i] = (int) mArrF[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop2() {
+    mArrI[20] = -2;
+    for (int i = 0; i < mArrF.length; i++) {
+      mArrF[i] = mArrI[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop3() {
+    mArrD[20] = -3;
+    for (int i = 0; i < mArrJ.length; i++) {
+      mArrJ[i] = (long) mArrD[20] - 2;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4() licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+
+  private void FalseCrossOverLoop4() {
+    mArrJ[20] = -4;
+    for (int i = 0; i < mArrD.length; i++) {
+      mArrD[i] = mArrJ[20] - 2;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    new Main().DoInstanceTests();
+    System.out.println("passed");
+  }
+
+  private void DoInstanceTests() {
+    // Type Z.
+    mZ = true;
+    mArrZ = new boolean[100];
+    InvLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(true, mArrZ[i]);
+    }
+    VarLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 10, mArrZ[i]);
+    }
+    CrossOverLoopZ();
+    for (int i = 0; i < mArrZ.length; i++) {
+      expectEquals(i <= 20, mArrZ[i]);
+    }
+    // Type B.
+    mB = 1;
+    mArrB = new byte[100];
+    InvLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(1, mArrB[i]);
+    }
+    VarLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 10 ? 1 : 2, mArrB[i]);
+    }
+    CrossOverLoopB();
+    for (int i = 0; i < mArrB.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrB[i]);
+    }
+    // Type C.
+    mC = 2;
+    mArrC = new char[100];
+    InvLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(2, mArrC[i]);
+    }
+    VarLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 10 ? 2 : 3, mArrC[i]);
+    }
+    CrossOverLoopC();
+    for (int i = 0; i < mArrC.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrC[i]);
+    }
+    // Type S.
+    mS = 3;
+    mArrS = new short[100];
+    InvLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(3, mArrS[i]);
+    }
+    VarLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 10 ? 3 : 4, mArrS[i]);
+    }
+    CrossOverLoopS();
+    for (int i = 0; i < mArrS.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrS[i]);
+    }
+    // Type I.
+    mI = 4;
+    mArrI = new int[100];
+    InvLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(4, mArrI[i]);
+    }
+    VarLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 10 ? 4 : 5, mArrI[i]);
+    }
+    CrossOverLoopI();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrI[i]);
+    }
+    // Type J.
+    mJ = 5;
+    mArrJ = new long[100];
+    InvLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(5, mArrJ[i]);
+    }
+    VarLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 10 ? 5 : 6, mArrJ[i]);
+    }
+    CrossOverLoopJ();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrJ[i]);
+    }
+    // Type F.
+    mF = 6.0f;
+    mArrF = new float[100];
+    InvLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(6, mArrF[i]);
+    }
+    VarLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 10 ? 6 : 7, mArrF[i]);
+    }
+    CrossOverLoopF();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrF[i]);
+    }
+    // Type D.
+    mD = 7.0;
+    mArrD = new double[100];
+    InvLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(7.0, mArrD[i]);
+    }
+    VarLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 10 ? 7 : 8, mArrD[i]);
+    }
+    CrossOverLoopD();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(i <= 20 ? 113 : 115, mArrD[i]);
+    }
+    // Type L.
+    mL = anObject;
+    mArrL = new Object[100];
+    InvLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(anObject, mArrL[i]);
+    }
+    VarLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 10 ? anObject : anotherObject, mArrL[i]);
+    }
+    CrossOverLoopL();
+    for (int i = 0; i < mArrL.length; i++) {
+      expectEquals(i <= 20 ? anObject : anotherObject, mArrL[i]);
+    }
+    // False cross-over.
+    FalseCrossOverLoop1();
+    for (int i = 0; i < mArrI.length; i++) {
+      expectEquals(-3, mArrI[i]);
+    }
+    FalseCrossOverLoop2();
+    for (int i = 0; i < mArrF.length; i++) {
+      expectEquals(-4, mArrF[i]);
+    }
+    FalseCrossOverLoop3();
+    for (int i = 0; i < mArrJ.length; i++) {
+      expectEquals(-5, mArrJ[i]);
+    }
+    FalseCrossOverLoop4();
+    for (int i = 0; i < mArrD.length; i++) {
+      expectEquals(-6, mArrD[i]);
+    }
+  }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java
index c644692..b12fbd6 100644
--- a/test/530-checker-loops2/src/Main.java
+++ b/test/530-checker-loops2/src/Main.java
@@ -710,8 +710,8 @@
         // making them a candidate for deoptimization based on constant indices.
         // Compiler should ensure the array loads are not subsequently hoisted
         // "above" the deoptimization "barrier" on the bounds.
-        a[0][i] = 1;
-        a[1][i] = 2;
+        a[1][i] = 1;
+        a[2][i] = 2;
         a[99][i] = 3;
       }
     }
@@ -1042,11 +1042,11 @@
     a = new int[100][10];
     expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
     for (int i = 0; i < 10; i++) {
-      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
-      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 1 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[2][i]);
       expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
     }
-    a = new int[2][10];
+    a = new int[3][10];
     sResult = 0;
     try {
       expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
@@ -1054,8 +1054,8 @@
       sResult = 1;
     }
     expectEquals(1, sResult);
-    expectEquals(a[0][1], 1);
-    expectEquals(a[1][1], 2);
+    expectEquals(a[1][1], 1);
+    expectEquals(a[2][1], 2);
 
     // Dynamic BCE combined with constant indices of all types.
     boolean[] x1 = { true };
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 4d6ea06..89875d7 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -70,6 +70,10 @@
   }
 }
 
+interface Filter {
+  public boolean isValid(int i);
+}
+
 public class Main {
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
@@ -78,7 +82,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
@@ -124,7 +128,6 @@
   }
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
-  /// CHECK: NewInstance
   /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -137,7 +140,6 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
-  /// CHECK: NewInstance
   /// CHECK: StaticFieldGet
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -149,9 +151,6 @@
 
   // A new allocation (even non-singleton) shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
-    // Do an allocation here to avoid the HLoadClass and HClinitCheck
-    // at the second allocation.
-    new TestClass();
     TestClass obj1 = TestClass.sTestClassObj;
     TestClass obj2 = new TestClass();  // Cannot alias with obj or obj1 which pre-exist.
     obj.next = obj2;  // Make obj2 a non-singleton.
@@ -256,7 +255,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test8() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK: InvokeVirtual
   /// CHECK-NOT: NullCheck
@@ -414,7 +413,7 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test16() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
@@ -431,7 +430,7 @@
 
   /// CHECK-START: int Main.test17() load_store_elimination (after)
   /// CHECK: <<Const0:i\d+>> IntConstant 0
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK: Return [<<Const0>>]
@@ -527,12 +526,12 @@
   /// CHECK: InstanceFieldGet
 
   /// CHECK-START: int Main.test22() load_store_elimination (after)
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
-  /// CHECK: NewInstance
+  /// CHECK-NOT: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK-NOT: InstanceFieldGet
@@ -673,7 +672,7 @@
   /// CHECK: Select
 
   // Test that HSelect creates alias.
-  public static int $noinline$testHSelect(boolean b) {
+  static int $noinline$testHSelect(boolean b) {
     if (sFlag) {
       throw new Error();
     }
@@ -686,19 +685,51 @@
     return obj2.i;
   }
 
-  public static void assertIntEquals(int result, int expected) {
+  static int sumWithFilter(int[] array, Filter f) {
+    int sum = 0;
+    for (int i = 0; i < array.length; i++) {
+      if (f.isValid(array[i])) {
+        sum += array[i];
+      }
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.sumWithinRange(int[], int, int) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // A lambda-style allocation can be eliminated after inlining.
+  static int sumWithinRange(int[] array, final int low, final int high) {
+    Filter filter = new Filter() {
+      public boolean isValid(int i) {
+        return (i >= low) && (i <= high);
+      }
+    };
+    return sumWithFilter(array, filter);
+  }
+
+  static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertFloatEquals(float result, float expected) {
+  static void assertFloatEquals(float result, float expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
-  public static void assertDoubleEquals(double result, double expected) {
+  static void assertDoubleEquals(double result, double expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
@@ -746,6 +777,8 @@
     assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
     assertIntEquals($noinline$testHSelect(true), 0xdead);
+    int[] array = {2, 5, 9, -1, -3, 10, 8, 4};
+    assertIntEquals(sumWithinRange(array, 1, 5), 11);
   }
 
   static boolean sFlag;
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index be666e9..15a9504 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -16,9 +16,69 @@
 
 
 public class Main {
+  public static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void main(String[] args) {
     stringEqualsSame();
     stringArgumentNotNull("Foo");
+
+    assertIntEquals(0, $opt$noinline$getStringLength(""));
+    assertIntEquals(3, $opt$noinline$getStringLength("abc"));
+    assertIntEquals(10, $opt$noinline$getStringLength("0123456789"));
+
+    assertBooleanEquals(true, $opt$noinline$isStringEmpty(""));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("abc"));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("0123456789"));
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Length:i\d+>>   InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+
+  static public int $opt$noinline$getStringLength(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.length();
+  }
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  InvokeVirtual intrinsic:StringIsEmpty
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  Equal [<<Length>>,<<Const0>>]
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringIsEmpty
+
+  static public boolean $opt$noinline$isStringEmpty(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.isEmpty();
   }
 
   /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 62511df..9f9916d 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -215,10 +215,10 @@
 ## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
 ## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
 ## CHECK-DAG:     <<Add:i\d+>>       Add [<<Arg0>>,<<Arg1>>]
-## CHECK-DAG:     <<Phi:i\d+>>       Phi [<<Add>>,<<Const0xf>>] reg:3 is_catch_phi:false
+## CHECK-DAG:     <<Select:i\d+>>    Select [<<Const0xf>>,<<Add>>,{{z\d+}}]
 ## CHECK-DAG:                        Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
-## CHECK-DAG:                        Phi [<<Phi>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
 ## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
@@ -246,7 +246,6 @@
     add-int v2, p0, p1             # dead catch phi input, defined in the dead block (HInstruction)
     move v3, v2
     if-eqz v3, :define_phi
-    sput v3, LTestCase;->sField:I  # beat HSelect simplification (has side-effects, does not throw)
     const v3, 0xf
     :define_phi
     # v3 = Phi [Add, 0xf]          # dead catch phi input, defined in the dead block (HPhi)
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
index edb8a68..a4561b8 100644
--- a/test/551-checker-shifter-operand/src/Main.java
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -500,9 +500,9 @@
     assertIntEquals(a + $noinline$IntShl(b, 16),  a + (b << 16));
     assertIntEquals(a + $noinline$IntShl(b, 30),  a + (b << 30));
     assertIntEquals(a + $noinline$IntShl(b, 31),  a + (b << 31));
-    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << 32));
-    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << 62));
-    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << 63));
+    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << $opt$inline$IntConstant32()));
+    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << $opt$inline$IntConstant62()));
+    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << $opt$inline$IntConstant63()));
 
     assertIntEquals(a - $noinline$IntShr(b, 1),   a - (b >>  1));
     assertIntEquals(a - $noinline$IntShr(b, 6),   a - (b >>  6));
@@ -513,9 +513,9 @@
     assertIntEquals(a - $noinline$IntShr(b, 16),  a - (b >> 16));
     assertIntEquals(a - $noinline$IntShr(b, 30),  a - (b >> 30));
     assertIntEquals(a - $noinline$IntShr(b, 31),  a - (b >> 31));
-    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> 32));
-    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> 62));
-    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> 63));
+    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> $opt$inline$IntConstant32()));
+    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> $opt$inline$IntConstant62()));
+    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> $opt$inline$IntConstant63()));
 
     assertIntEquals(a ^ $noinline$IntUshr(b, 1),   a ^ (b >>>  1));
     assertIntEquals(a ^ $noinline$IntUshr(b, 6),   a ^ (b >>>  6));
@@ -526,11 +526,17 @@
     assertIntEquals(a ^ $noinline$IntUshr(b, 16),  a ^ (b >>> 16));
     assertIntEquals(a ^ $noinline$IntUshr(b, 30),  a ^ (b >>> 30));
     assertIntEquals(a ^ $noinline$IntUshr(b, 31),  a ^ (b >>> 31));
-    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> 32));
-    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> 62));
-    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> 63));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> $opt$inline$IntConstant32()));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> $opt$inline$IntConstant62()));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> $opt$inline$IntConstant63()));
   }
 
+  // Hiding constants outside the range [0, 32) used for int shifts from Jack.
+  // (Jack extracts only the low 5 bits.)
+  public static int $opt$inline$IntConstant32() { return 32; }
+  public static int $opt$inline$IntConstant62() { return 62; }
+  public static int $opt$inline$IntConstant63() { return 63; }
+
 
   static long $noinline$LongShl(long b, long c) {
     if (doThrow) throw new Error();
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index d50edd8..3d985bf 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -22,6 +22,12 @@
     }
   }
 
+  public static void assertStringEquals(String expected, String result) {
+    if (expected != null ? !expected.equals(result) : result != null) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static boolean doThrow = false;
 
   private static int $noinline$foo(int x) {
@@ -185,6 +191,66 @@
     return x;
   }
 
+  /// CHECK-START: java.lang.String Main.$noinline$getBootImageString() sharpening (before)
+  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
+  public static String $noinline$getBootImageString() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // Empty string is known to be in the boot image.
+    return "";
+  }
+
+  /// CHECK-START: java.lang.String Main.$noinline$getNonBootImageString() sharpening (before)
+  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_x86 (after)
+  /// CHECK-DAG:            X86ComputeBaseMethodAddress
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_arm (after)
+  /// CHECK-DAG:            ArmDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  public static String $noinline$getNonBootImageString() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // This string is not in the boot image.
+    return "non-boot-image-string";
+  }
+
   public static void main(String[] args) {
     assertIntEquals(1, testSimple(1));
     assertIntEquals(1, testDiamond(false, 1));
@@ -194,5 +260,7 @@
     assertIntEquals(1, testLoopWithDiamond(null, false, 1));
     assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
     assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+    assertStringEquals("", $noinline$getBootImageString());
+    assertStringEquals("non-boot-image-string", $noinline$getNonBootImageString());
   }
 }
diff --git a/test/557-checker-instruction-simplifier-ror/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
index 027f262..6d8b74d 100644
--- a/test/557-checker-instruction-simplifier-ror/src/Main.java
+++ b/test/557-checker-instruction-simplifier-ror/src/Main.java
@@ -175,7 +175,7 @@
 
   //  (i >>> #distance) | (i << #-distance)
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (before)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
@@ -184,19 +184,23 @@
   /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
   /// CHECK:                                Return [<<Or>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
   /// CHECK:                                Return [<<Ror>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier_after_bce (after)
   /// CHECK-NOT:      UShr
   /// CHECK-NOT:      Shl
   public static int ror_int_constant_c_negc(int value) {
-    return (value >>> 2) | (value << -2);
+    return (value >>> 2) | (value << $opt$inline$IntConstantM2());
   }
 
+  // Hiding constants outside the range [0, 32) used for int shifts from Jack.
+  // (Jack extracts only the low 5 bits.)
+  public static int $opt$inline$IntConstantM2() { return -2; }
+
   //  (j >>> #distance) | (j << #-distance)
 
   /// CHECK-START: long Main.ror_long_constant_c_negc(long) instruction_simplifier (before)
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
index 2683b25..aad9689 100644
--- a/test/564-checker-bitcount/src/Main.java
+++ b/test/564-checker-bitcount/src/Main.java
@@ -20,67 +20,187 @@
   // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
   // CHECK-DAG: popcnt
 
-  /// CHECK-START: int Main.bits32(int) intrinsics_recognition (after)
-  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerBitCount
-  /// CHECK-DAG:                 Return [<<Result>>]
-  private static int bits32(int x) {
+
+  /// CHECK-START: int Main.$noinline$BitCountBoolean(boolean) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountBoolean(boolean x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountByte(byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountByte(byte x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
     return Integer.bitCount(x);
   }
 
-  /// CHECK-START: int Main.bits64(long) intrinsics_recognition (after)
-  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongBitCount
-  /// CHECK-DAG:                 Return [<<Result>>]
-  private static int bits64(long x) {
+  /// CHECK-START: int Main.$noinline$BitCountShort(short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountShort(short x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountChar(char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountChar(char x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountInt(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountInt(int x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountLong(long) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:LongBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountLong(long x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
     return Long.bitCount(x);
   }
 
+  public static void testBitCountBoolean() {
+    expectEqualsInt($noinline$BitCountBoolean(false), 0);
+    expectEqualsInt($noinline$BitCountBoolean(true), 1);
+  }
+
+  public static void testBitCountByte() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a byte value widened to an int.
+    int signExtensionSize = Integer.SIZE - Byte.SIZE;
+    // Sign bit position in a byte.
+    int signBit = Byte.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountByte((byte) 0x00), 0);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x01), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x10), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x11), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x03), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x70), 3);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xF0), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x0F), 4);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x12), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x9A), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xFF), 8 + signExtensionSize);
+
+    for (int i = 0; i < Byte.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountByte((byte) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountShort() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a short value widened to an int.
+    int signExtensionSize = Integer.SIZE - Short.SIZE;
+    // Sign bit position in a short.
+    int signBit = Short.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountShort((short) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountShort((short) 0x9ABC), 9 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountShort((short) 0xFFFF), 16 + signExtensionSize);
+
+    for (int i = 0; i < Short.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountShort((short) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountChar() {
+    expectEqualsInt($noinline$BitCountChar((char) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountChar((char) 0x9ABC), 9);
+    expectEqualsInt($noinline$BitCountChar((char) 0xFFFF), 16);
+
+    for (int i = 0; i < Character.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountChar((char) (1 << i)), 1);
+    }
+  }
+
+  public static void testBitCountInt() {
+    expectEqualsInt($noinline$BitCountInt(0x00000000), 0);
+    expectEqualsInt($noinline$BitCountInt(0x00000001), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000000), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000001), 2);
+    expectEqualsInt($noinline$BitCountInt(0x00000003), 2);
+    expectEqualsInt($noinline$BitCountInt(0x70000000), 3);
+    expectEqualsInt($noinline$BitCountInt(0x000F0000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x00001111), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11110000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11111111), 8);
+    expectEqualsInt($noinline$BitCountInt(0x12345678), 13);
+    expectEqualsInt($noinline$BitCountInt(0x9ABCDEF0), 19);
+    expectEqualsInt($noinline$BitCountInt(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < Integer.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountInt(1 << i), 1);
+    }
+  }
+
+  public static void testBitCountLong() {
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000000L), 0);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000001L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000000L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000001L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000003L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x7000000000000000L), 3);
+    expectEqualsInt($noinline$BitCountLong(0x000F000000000000L), 4);
+    expectEqualsInt($noinline$BitCountLong(0x0000000011111111L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111100000000L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111111111111L), 16);
+    expectEqualsInt($noinline$BitCountLong(0x123456789ABCDEF1L), 33);
+    expectEqualsInt($noinline$BitCountLong(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < Long.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountLong(1L << i), 1);
+    }
+  }
+
   public static void main(String args[]) {
-    expectEquals32(bits32(0x00000000), 0);
-    expectEquals32(bits32(0x00000001), 1);
-    expectEquals32(bits32(0x10000000), 1);
-    expectEquals32(bits32(0x10000001), 2);
-    expectEquals32(bits32(0x00000003), 2);
-    expectEquals32(bits32(0x70000000), 3);
-    expectEquals32(bits32(0x000F0000), 4);
-    expectEquals32(bits32(0x00001111), 4);
-    expectEquals32(bits32(0x11110000), 4);
-    expectEquals32(bits32(0x11111111), 8);
-    expectEquals32(bits32(0x12345678), 13);
-    expectEquals32(bits32(0x9ABCDEF0), 19);
-    expectEquals32(bits32(0xFFFFFFFF), 32);
-
-    for (int i = 0; i < 32; i++) {
-      expectEquals32(bits32(1 << i), 1);
-    }
-
-    expectEquals64(bits64(0x0000000000000000L), 0);
-    expectEquals64(bits64(0x0000000000000001L), 1);
-    expectEquals64(bits64(0x1000000000000000L), 1);
-    expectEquals64(bits64(0x1000000000000001L), 2);
-    expectEquals64(bits64(0x0000000000000003L), 2);
-    expectEquals64(bits64(0x7000000000000000L), 3);
-    expectEquals64(bits64(0x000F000000000000L), 4);
-    expectEquals64(bits64(0x0000000011111111L), 8);
-    expectEquals64(bits64(0x1111111100000000L), 8);
-    expectEquals64(bits64(0x1111111111111111L), 16);
-    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
-    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
-
-    for (int i = 0; i < 64; i++) {
-      expectEquals64(bits64(1L << i), 1);
-    }
+    testBitCountBoolean();
+    testBitCountByte();
+    testBitCountShort();
+    testBitCountChar();
+    testBitCountInt();
+    testBitCountLong();
 
     System.out.println("passed");
   }
 
-  private static void expectEquals32(int expected, int result) {
+  private static void expectEqualsInt(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
-  private static void expectEquals64(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
+
+  private static boolean doThrow = false;
 }
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
index b2934ed..7b2c6cb 100644
--- a/test/566-polymorphic-inlining/polymorphic_inline.cc
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -43,7 +43,8 @@
   }
 
   CodeInfo info = header->GetOptimizedCodeInfo();
-  CHECK(info.HasInlineInfo());
+  CodeInfoEncoding encoding = info.ExtractEncoding();
+  CHECK(info.HasInlineInfo(encoding));
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureJittedAndPolymorphicInline(JNIEnv*, jclass cls) {
@@ -59,6 +60,7 @@
 
   do_checks(cls, "testInvokeVirtual");
   do_checks(cls, "testInvokeInterface");
+  do_checks(cls, "$noinline$testInlineToSameTarget");
 }
 
 }  // namespace art
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
index 7283e86..a59ce5b 100644
--- a/test/566-polymorphic-inlining/src/Main.java
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -25,6 +25,12 @@
     }
   }
 
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected  + ", got " + actual);
+    }
+  }
+
   public static void main(String[] args) throws Exception {
     System.loadLibrary(args[0]);
     Main[] mains = new Main[3];
@@ -41,6 +47,8 @@
       testInvokeVirtual(mains[1]);
       testInvokeInterface(itfs[0]);
       testInvokeInterface(itfs[1]);
+      $noinline$testInlineToSameTarget(mains[0]);
+      $noinline$testInlineToSameTarget(mains[1]);
     }
 
     ensureJittedAndPolymorphicInline();
@@ -56,6 +64,10 @@
     // This will trigger a deoptimization of the compiled code.
     assertEquals(OtherSubclass.class, testInvokeVirtual(mains[2]));
     assertEquals(OtherSubclass.class, testInvokeInterface(itfs[2]));
+
+    // Run this once to make sure we execute the JITted code.
+    $noinline$testInlineToSameTarget(mains[0]);
+    assertEquals(20001, counter);
   }
 
   public Class sameInvokeVirtual() {
@@ -76,9 +88,21 @@
     return m.sameInvokeVirtual();
   }
 
+  public static void $noinline$testInlineToSameTarget(Main m) {
+    if (doThrow) throw new Error("");
+    m.increment();
+  }
+
   public Object field = new Object();
 
   public static native void ensureJittedAndPolymorphicInline();
+
+  public void increment() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo
+    counter++;
+  }
+  public static int counter = 0;
+  public static boolean doThrow = false;
 }
 
 class Subclass extends Main {
diff --git a/test/567-checker-compare/src/Main.java b/test/567-checker-compare/src/Main.java
index f95ff1a..8587950 100644
--- a/test/567-checker-compare/src/Main.java
+++ b/test/567-checker-compare/src/Main.java
@@ -16,6 +16,32 @@
 
 public class Main {
 
+  public static boolean doThrow = false;
+
+  /// CHECK-START: void Main.$opt$noinline$testReplaceInputWithItself(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<ArgX:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:     <<Cmp:i\d+>>    InvokeStaticOrDirect [<<ArgX>>,<<Zero>>,<<Method>>] intrinsic:IntegerCompare
+  /// CHECK-DAG:                     GreaterThanOrEqual [<<Cmp>>,<<Zero>>]
+
+  /// CHECK-START: void Main.$opt$noinline$testReplaceInputWithItself(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgX:i\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                     GreaterThanOrEqual [<<ArgX>>,<<Zero>>]
+
+  public static void $opt$noinline$testReplaceInputWithItself(int x) {
+    if (doThrow) { throw new Error(); }
+
+    // The instruction simplifier first replaces Integer.compare(x, 0) with Compare HIR
+    // and then merges the Compare into the GreaterThanOrEqual. This is a regression
+    // test that to check that it is allowed to replace the second input of the
+    // GreaterThanOrEqual, i.e. <<Zero>>, with the very same instruction.
+    if (Integer.compare(x, 0) < 0) {
+      System.out.println("OOOPS");
+    }
+  }
+
   /// CHECK-START: int Main.compareBooleans(boolean, boolean) intrinsics_recognition (after)
   /// CHECK-DAG:     <<Method:[ij]\d+>> CurrentMethod
   /// CHECK-DAG:     <<Zero:i\d+>>   IntConstant 0
@@ -890,6 +916,8 @@
 
 
   public static void main(String args[]) {
+    $opt$noinline$testReplaceInputWithItself(42);
+
     testCompareBooleans();
     testCompareBytes();
     testCompareShorts();
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
index 25fb220..65447be 100644
--- a/test/570-checker-osr/expected.txt
+++ b/test/570-checker-osr/expected.txt
@@ -3,3 +3,4 @@
 200000
 300000
 400000
+b28210356 passed.
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 09e97ea..2fa5800 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -14,21 +14,23 @@
  * limitations under the License.
  */
 
-#include "art_method.h"
+#include "art_method-inl.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/profiling_info.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
 #include "stack_map.h"
 
 namespace art {
 
 class OsrVisitor : public StackVisitor {
  public:
-  explicit OsrVisitor(Thread* thread)
+  explicit OsrVisitor(Thread* thread, const char* method_name)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name),
         in_osr_method_(false),
         in_interpreter_(false) {}
 
@@ -36,13 +38,7 @@
     ArtMethod* m = GetMethod();
     std::string m_name(m->GetName());
 
-    if ((m_name.compare("$noinline$returnInt") == 0) ||
-        (m_name.compare("$noinline$returnFloat") == 0) ||
-        (m_name.compare("$noinline$returnDouble") == 0) ||
-        (m_name.compare("$noinline$returnLong") == 0) ||
-        (m_name.compare("$noinline$deopt") == 0) ||
-        (m_name.compare("$noinline$inlineCache") == 0) ||
-        (m_name.compare("$noinline$stackOverflow") == 0)) {
+    if (m_name.compare(method_name_) == 0) {
       const OatQuickMethodHeader* header =
           Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
       if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
@@ -55,74 +51,89 @@
     return true;
   }
 
+  const char* const method_name_;
   bool in_osr_method_;
   bool in_interpreter_;
 };
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInOsrCode(JNIEnv* env,
+                                                            jclass,
+                                                            jstring method_name) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit == nullptr) {
     // Just return true for non-jit configurations to stop the infinite loop.
     return JNI_TRUE;
   }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
   ScopedObjectAccess soa(Thread::Current());
-  OsrVisitor visitor(soa.Self());
+  OsrVisitor visitor(soa.Self(), chars.c_str());
   visitor.WalkStack();
   return visitor.in_osr_method_;
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInInterpreter(JNIEnv*, jclass) {
-  if (!Runtime::Current()->UseJit()) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInInterpreter(JNIEnv* env,
+                                                                jclass,
+                                                                jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
     // The return value is irrelevant if we're not using JIT.
     return false;
   }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
   ScopedObjectAccess soa(Thread::Current());
-  OsrVisitor visitor(soa.Self());
+  OsrVisitor visitor(soa.Self(), chars.c_str());
   visitor.WalkStack();
   return visitor.in_interpreter_;
 }
 
 class ProfilingInfoVisitor : public StackVisitor {
  public:
-  explicit ProfilingInfoVisitor(Thread* thread)
+  explicit ProfilingInfoVisitor(Thread* thread, const char* method_name)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
 
   bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
     std::string m_name(m->GetName());
 
-    if ((m_name.compare("$noinline$inlineCache") == 0) ||
-        (m_name.compare("$noinline$stackOverflow") == 0)) {
+    if (m_name.compare(method_name_) == 0) {
       ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
       return false;
     }
     return true;
   }
+
+  const char* const method_name_;
 };
 
-extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasProfilingInfo(JNIEnv*, jclass) {
-  if (!Runtime::Current()->UseJit()) {
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasProfilingInfo(JNIEnv* env,
+                                                                   jclass,
+                                                                   jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
     return;
   }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
   ScopedObjectAccess soa(Thread::Current());
-  ProfilingInfoVisitor visitor(soa.Self());
+  ProfilingInfoVisitor visitor(soa.Self(), chars.c_str());
   visitor.WalkStack();
 }
 
 class OsrCheckVisitor : public StackVisitor {
  public:
-  explicit OsrCheckVisitor(Thread* thread)
+  OsrCheckVisitor(Thread* thread, const char* method_name)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
 
   bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
     std::string m_name(m->GetName());
 
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if ((m_name.compare("$noinline$inlineCache") == 0) ||
-        (m_name.compare("$noinline$stackOverflow") == 0)) {
+    if (m_name.compare(method_name_) == 0) {
       while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
         // Sleep to yield to the compiler thread.
         sleep(0);
@@ -133,14 +144,20 @@
     }
     return true;
   }
+
+  const char* const method_name_;
 };
 
-extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasOsrCode(JNIEnv*, jclass) {
-  if (!Runtime::Current()->UseJit()) {
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasOsrCode(JNIEnv* env,
+                                                             jclass,
+                                                             jstring method_name) {
+  if (!Runtime::Current()->UseJitCompilation()) {
     return;
   }
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
   ScopedObjectAccess soa(Thread::Current());
-  OsrCheckVisitor visitor(soa.Self());
+  OsrCheckVisitor visitor(soa.Self(), chars.c_str());
   visitor.WalkStack();
 }
 
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 1142d49..15c232d 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -16,8 +16,28 @@
 
 public class Main {
   public static void main(String[] args) {
-    new SubMain();
     System.loadLibrary(args[0]);
+    Thread testThread = new Thread() {
+      public void run() {
+        performTest();
+      }
+    };
+    testThread.start();
+    try {
+      testThread.join(20 * 1000);  // 20s timeout.
+    } catch (InterruptedException ie) {
+      System.out.println("Interrupted.");
+      System.exit(1);
+    }
+    Thread.State state = testThread.getState();
+    if (state != Thread.State.TERMINATED) {
+      System.out.println("Test timed out, current state: " + state);
+      System.exit(1);
+    }
+  }
+
+  public static void performTest() {
+    new SubMain();
     if ($noinline$returnInt() != 53) {
       throw new Error("Unexpected return value");
     }
@@ -41,8 +61,23 @@
       throw new Error("Unexpected return value");
     }
 
+    $noinline$inlineCache2(new Main(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache2(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
+      throw new Error("Unexpected return value");
+    }
+
+    // Test polymorphic inline cache to the same target (inlineCache3).
+    $noinline$inlineCache3(new Main(), /* isSecondInvocation */ false);
+    $noinline$inlineCache3(new SubMain(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache3(new SubMain(), /* isSecondInvocation */ true) != null) {
+      throw new Error("Unexpected return value");
+    }
+
     $noinline$stackOverflow(new Main(), /* isSecondInvocation */ false);
     $noinline$stackOverflow(new SubMain(), /* isSecondInvocation */ true);
+
+    $opt$noinline$testOsrInlineLoop(null);
+    System.out.println("b28210356 passed.");
   }
 
   public static int $noinline$returnInt() {
@@ -50,7 +85,7 @@
     int i = 0;
     for (; i < 100000; ++i) {
     }
-    while (!ensureInOsrCode()) {}
+    while (!isInOsrCode("$noinline$returnInt")) {}
     System.out.println(i);
     return 53;
   }
@@ -60,7 +95,7 @@
     int i = 0;
     for (; i < 200000; ++i) {
     }
-    while (!ensureInOsrCode()) {}
+    while (!isInOsrCode("$noinline$returnFloat")) {}
     System.out.println(i);
     return 42.2f;
   }
@@ -70,7 +105,7 @@
     int i = 0;
     for (; i < 300000; ++i) {
     }
-    while (!ensureInOsrCode()) {}
+    while (!isInOsrCode("$noinline$returnDouble")) {}
     System.out.println(i);
     return Double.longBitsToDouble(0xF000000000001111L);
   }
@@ -80,7 +115,7 @@
     int i = 0;
     for (; i < 400000; ++i) {
     }
-    while (!ensureInOsrCode()) {}
+    while (!isInOsrCode("$noinline$returnLong")) {}
     System.out.println(i);
     return 0xFFFF000000001111L;
   }
@@ -90,22 +125,22 @@
     int i = 0;
     for (; i < 100000; ++i) {
     }
-    while (!ensureInOsrCode()) {}
+    while (!isInOsrCode("$noinline$deopt")) {}
     DeoptimizationController.startDeoptimization();
   }
 
   public static Class $noinline$inlineCache(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
-    if (!ensureInInterpreter()) {
+    if (!isInInterpreter("$noinline$inlineCache")) {
       return SubMain.class;
     }
 
-    ensureHasProfilingInfo();
+    ensureHasProfilingInfo("$noinline$inlineCache");
 
     // Ensure that we have OSR code to jump to.
     if (isSecondInvocation) {
-      ensureHasOsrCode();
+      ensureHasOsrCode("$noinline$inlineCache");
     }
 
     // This call will be optimized in the OSR compiled code
@@ -117,17 +152,83 @@
     // code we are jumping to will have wrongly optimize other as being a
     // 'Main'.
     if (isSecondInvocation) {
-      while (!ensureInOsrCode()) {}
+      while (!isInOsrCode("$noinline$inlineCache")) {}
     }
 
     // We used to wrongly optimize this call and assume 'other' was a 'Main'.
     return other.returnClass();
   }
 
+  public static Class $noinline$inlineCache2(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$inlineCache2")) {
+      return SubMain.class;
+    }
+
+    ensureHasProfilingInfo("$noinline$inlineCache2");
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode("$noinline$inlineCache2");
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache2();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being null.
+    if (isSecondInvocation) {
+      while (!isInOsrCode("$noinline$inlineCache2")) {}
+    }
+
+    // We used to wrongly optimize this code and assume 'other' was always null.
+    return (other == null) ? null : other.returnClass();
+  }
+
+  public static Class $noinline$inlineCache3(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!isInInterpreter("$noinline$inlineCache3")) {
+      return null;
+    }
+
+    ensureHasProfilingInfo("$noinline$inlineCache3");
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode("$noinline$inlineCache3");
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache3();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being null.
+    if (isSecondInvocation) {
+      while (!isInOsrCode("$noinline$inlineCache3")) {}
+    }
+
+    // We used to wrongly optimize this code and assume 'other' was always null.
+    return (other == null) ? null : other.returnClass();
+  }
+
   public Main inlineCache() {
     return new Main();
   }
 
+  public Main inlineCache2() {
+    return null;
+  }
+
+  public Main inlineCache3() {
+    return null;
+  }
+
   public Class returnClass() {
     return Main.class;
   }
@@ -139,16 +240,16 @@
   public static void $noinline$stackOverflow(Main m, boolean isSecondInvocation) {
     // If we are running in non-JIT mode, or were unlucky enough to get this method
     // already JITted, just return the expected value.
-    if (!ensureInInterpreter()) {
+    if (!isInInterpreter("$noinline$stackOverflow")) {
       return;
     }
 
     // We need a ProfilingInfo object to populate the 'otherInlineCache' call.
-    ensureHasProfilingInfo();
+    ensureHasProfilingInfo("$noinline$stackOverflow");
 
     if (isSecondInvocation) {
       // Ensure we have an OSR code and we jump to it.
-      while (!ensureInOsrCode()) {}
+      while (!isInOsrCode("$noinline$stackOverflow")) {}
     }
 
     for (int i = 0; i < (isSecondInvocation ? 10000000 : 1); ++i) {
@@ -159,10 +260,46 @@
     }
   }
 
-  public static native boolean ensureInInterpreter();
-  public static native boolean ensureInOsrCode();
-  public static native void ensureHasProfilingInfo();
-  public static native void ensureHasOsrCode();
+  public static void $opt$noinline$testOsrInlineLoop(String[] args) {
+    // Regression test for inlining a method with a loop to a method without a loop in OSR mode.
+    if (doThrow) throw new Error();
+    assertIntEquals(12, $opt$inline$testRemoveSuspendCheck(12, 5));
+    // Since we cannot have a loop directly in this method, we need to force the OSR
+    // compilation from native code.
+    ensureHasProfilingInfo("$opt$noinline$testOsrInlineLoop");
+    ensureHasOsrCode("$opt$noinline$testOsrInlineLoop");
+  }
+
+  public static int $opt$inline$testRemoveSuspendCheck(int x, int y) {
+    // For this test we need an inlined loop and have DCE re-run loop analysis
+    // after inlining.
+    while (y > 0) {
+      while ($opt$inline$inlineFalse() || !$opt$inline$inlineTrue()) {
+        x++;
+      }
+      y--;
+    }
+    return x;
+  }
+
+  public static boolean $opt$inline$inlineTrue() {
+    return true;
+  }
+
+  public static boolean $opt$inline$inlineFalse() {
+    return false;
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static native boolean isInOsrCode(String methodName);
+  public static native boolean isInInterpreter(String methodName);
+  public static native void ensureHasProfilingInfo(String methodName);
+  public static native void ensureHasOsrCode(String methodName);
 
   public static boolean doThrow = false;
 }
@@ -176,6 +313,10 @@
     return new SubMain();
   }
 
+  public Main inlineCache2() {
+    return new SubMain();
+  }
+
   public void otherInlineCache() {
     return;
   }
diff --git a/test/572-checker-array-get-regression/src/Main.java b/test/572-checker-array-get-regression/src/Main.java
index b55be70..89b97ed 100644
--- a/test/572-checker-array-get-regression/src/Main.java
+++ b/test/572-checker-array-get-regression/src/Main.java
@@ -25,13 +25,11 @@
   /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
   /// CHECK-DAG:     <<ConstM1:i\d+>>      IntConstant -1
   /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
-  /// CHECK-DAG:     <<NullCheck1:l\d+>>   NullCheck [<<Array>>]
-  /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<NullCheck1>>]
+  /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<Array>>]
   /// CHECK-DAG:     <<Index:i\d+>>        Add [<<Length1>>,<<ConstM1>>]
-  /// CHECK-DAG:     <<NullCheck2:l\d+>>   NullCheck [<<Array>>]
-  /// CHECK-DAG:     <<Length2:i\d+>>      ArrayLength [<<NullCheck2>>]
+  /// CHECK-DAG:     <<Length2:i\d+>>      ArrayLength [<<Array>>]
   /// CHECK-DAG:     <<BoundsCheck:i\d+>>  BoundsCheck [<<Index>>,<<Length2>>]
-  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<NullCheck2>>,<<BoundsCheck>>]
+  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<BoundsCheck>>]
   /// CHECK-DAG:                           Return [<<LastElement>>]
 
 
diff --git a/test/586-checker-null-array-get/src/Main.java b/test/586-checker-null-array-get/src/Main.java
index 332cfb0..e0782bc 100644
--- a/test/586-checker-null-array-get/src/Main.java
+++ b/test/586-checker-null-array-get/src/Main.java
@@ -14,10 +14,20 @@
  * limitations under the License.
  */
 
+class Test1 {
+  int[] iarr;
+}
+
+class Test2 {
+  float[] farr;
+}
+
 public class Main {
   public static Object[] getObjectArray() { return null; }
   public static long[] getLongArray() { return null; }
   public static Object getNull() { return null; }
+  public static Test1 getNullTest1() { return null; }
+  public static Test2 getNullTest2() { return null; }
 
   public static void main(String[] args) {
     try {
@@ -26,13 +36,25 @@
     } catch (NullPointerException e) {
       // Expected.
     }
+    try {
+      bar();
+      throw new Error("Expected NullPointerException");
+    } catch (NullPointerException e) {
+      // Expected.
+    }
+    try {
+      test1();
+      throw new Error("Expected NullPointerException");
+    } catch (NullPointerException e) {
+      // Expected.
+    }
   }
 
   /// CHECK-START: void Main.foo() load_store_elimination (after)
-  /// CHECK-DAG: <<Null:l\d+>>  NullConstant
-  /// CHECK-DAG: <<Check:l\d+>> NullCheck [<<Null>>]
-  /// CHECK-DAG: <<Get1:j\d+>>  ArrayGet [<<Check>>,{{i\d+}}]
-  /// CHECK-DAG: <<Get2:l\d+>>  ArrayGet [<<Check>>,{{i\d+}}]
+  /// CHECK-DAG: <<Null:l\d+>>   NullConstant
+  /// CHECK-DAG: <<Check:l\d+>>  NullCheck [<<Null>>]
+  /// CHECK-DAG: <<Get1:j\d+>>   ArrayGet [<<Check>>,{{i\d+}}]
+  /// CHECK-DAG: <<Get2:l\d+>>   ArrayGet [<<Check>>,{{i\d+}}]
   public static void foo() {
     longField = getLongArray()[0];
     objectField = getObjectArray()[0];
@@ -56,7 +78,7 @@
     // elimination pass to add a HDeoptimize. Not having the bounds check helped
     // the load store elimination think it could merge two ArrayGet with different
     // types.
-    String[] array = ((String[])getNull());
+    String[] array = (String[])getNull();
     objectField = array[0];
     objectField = array[1];
     objectField = array[2];
@@ -68,6 +90,23 @@
     longField = longArray[3];
   }
 
+  /// CHECK-START: float Main.test1() load_store_elimination (after)
+  /// CHECK-DAG: <<Null:l\d+>>       NullConstant
+  /// CHECK-DAG: <<Check1:l\d+>>     NullCheck [<<Null>>]
+  /// CHECK-DAG: <<FieldGet1:l\d+>>  InstanceFieldGet [<<Check1>>] field_name:Test1.iarr
+  /// CHECK-DAG: <<Check2:l\d+>>     NullCheck [<<FieldGet1>>]
+  /// CHECK-DAG: <<ArrayGet1:i\d+>>  ArrayGet [<<Check2>>,{{i\d+}}]
+  /// CHECK-DAG: <<ArrayGet2:f\d+>>  ArrayGet [<<Check2>>,{{i\d+}}]
+  /// CHECK-DAG:                     Return [<<ArrayGet2>>]
+  public static float test1() {
+    Test1 test1 = getNullTest1();
+    Test2 test2 = getNullTest2();;
+    int[] iarr = test1.iarr;
+    float[] farr = test2.farr;
+    iarr[0] = iarr[1];
+    return farr[0];
+  }
+
   public static long longField;
   public static Object objectField;
 }
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/591-checker-regression-dead-loop/expected.txt
similarity index 100%
rename from test/525-checker-arrays-and-fields/expected.txt
rename to test/591-checker-regression-dead-loop/expected.txt
diff --git a/test/591-checker-regression-dead-loop/info.txt b/test/591-checker-regression-dead-loop/info.txt
new file mode 100644
index 0000000..f192b8d
--- /dev/null
+++ b/test/591-checker-regression-dead-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for Optimizing's dead block elimination which used to remove
+dependencies in the wrong order.
\ No newline at end of file
diff --git a/test/591-checker-regression-dead-loop/src/Main.java b/test/591-checker-regression-dead-loop/src/Main.java
new file mode 100644
index 0000000..6d9fcf8
--- /dev/null
+++ b/test/591-checker-regression-dead-loop/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  private static boolean $inline$false() { return false; }
+
+  /// CHECK-START: void Main.main(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-DAG:     <<Const0:i\d+>> IntConstant 0
+  /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
+  /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Const0>>,<<Add:i\d+>>] loop:{{B\d+}}
+  /// CHECK-DAG:                     InvokeVirtual [{{l\d+}},<<Phi>>] method_name:java.io.PrintStream.println
+  /// CHECK-DAG:     <<Add>>         Add [<<Phi>>,<<Const1>>]
+
+  public static void main(String[] args) {
+    if ($inline$false()) {
+      int x = 0;
+      while (true) {
+        System.out.println(x++);
+      }
+    }
+  }
+}
diff --git a/test/591-new-instance-string/expected.txt b/test/591-new-instance-string/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/591-new-instance-string/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/591-new-instance-string/info.txt b/test/591-new-instance-string/info.txt
new file mode 100644
index 0000000..16c7b1f
--- /dev/null
+++ b/test/591-new-instance-string/info.txt
@@ -0,0 +1 @@
+Regression test on new-instance that reaches multiple <init> calls.
diff --git a/test/591-new-instance-string/smali/new-instance.smali b/test/591-new-instance-string/smali/new-instance.smali
new file mode 100644
index 0000000..42559ca
--- /dev/null
+++ b/test/591-new-instance-string/smali/new-instance.smali
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LNewInstance;
+.super Ljava/lang/Object;
+
+.method public static multipleInit(I)V
+    .registers 2
+    new-instance v0, Ljava/lang/String;
+    if-eqz v1, :Skip
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    goto :Done
+:Skip
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+:Done
+    return-void
+.end method
diff --git a/test/591-new-instance-string/src/Main.java b/test/591-new-instance-string/src/Main.java
new file mode 100644
index 0000000..bd59b95
--- /dev/null
+++ b/test/591-new-instance-string/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  public static void main(String args[]) throws Throwable {
+    Class<?> c = Class.forName("NewInstance");
+    Method m = c.getMethod("multipleInit", int.class);
+    m.invoke(null, 0);
+    m.invoke(null, 1);
+    System.out.println("passed");
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/592-checker-regression-bool-input/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/592-checker-regression-bool-input/expected.txt
diff --git a/test/592-checker-regression-bool-input/info.txt b/test/592-checker-regression-bool-input/info.txt
new file mode 100644
index 0000000..8b97d9d
--- /dev/null
+++ b/test/592-checker-regression-bool-input/info.txt
@@ -0,0 +1,2 @@
+Regression test for Optimizing's GraphChecker which used to verify the internal
+type of a boolean input.
\ No newline at end of file
diff --git a/test/592-checker-regression-bool-input/smali/TestCase.smali b/test/592-checker-regression-bool-input/smali/TestCase.smali
new file mode 100644
index 0000000..56c499d
--- /dev/null
+++ b/test/592-checker-regression-bool-input/smali/TestCase.smali
@@ -0,0 +1,42 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: boolean TestCase.testCase() load_store_elimination (after)
+## CHECK-DAG:     If [{{b\d+}}]
+
+.method public static testCase()Z
+    .registers 6
+
+    sget-boolean v0, LMain;->field0:Z
+    sget-boolean v1, LMain;->field1:Z
+    or-int v2, v0, v1
+    int-to-byte v2, v2
+    sput-boolean v2, LMain;->field2:Z
+
+    # LSE will replace this sget with the type conversion above...
+    sget-boolean v2, LMain;->field2:Z
+
+    # ... and generate an If with a byte-typed condition.
+    if-eqz v2, :else
+    const v0, 0x1
+    return v0
+
+    :else
+    const v0, 0x0
+    return v0
+.end method
diff --git a/test/592-checker-regression-bool-input/src/Main.java b/test/592-checker-regression-bool-input/src/Main.java
new file mode 100644
index 0000000..35ae59c
--- /dev/null
+++ b/test/592-checker-regression-bool-input/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+  // Workaround for b/18051191.
+  class Inner {}
+
+  public static boolean field0;
+  public static boolean field1;
+  public static boolean field2;
+
+  public static void assertTrue(boolean result) {
+    if (!result) {
+      throw new Error("Expected true");
+    }
+  }
+
+  public static void assertFalse(boolean result) {
+    if (result) {
+      throw new Error("Expected false");
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase");
+
+    try {
+      field0 = true;
+      field1 = false;
+      assertTrue((Boolean) m.invoke(null, null));
+
+      field0 = true;
+      field1 = true;
+      assertTrue((Boolean) m.invoke(null, null));
+
+      field0 = false;
+      field1 = false;
+      assertFalse((Boolean) m.invoke(null, null));
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+}
diff --git a/test/593-checker-boolean-to-integral-conv/expected.txt b/test/593-checker-boolean-to-integral-conv/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/593-checker-boolean-to-integral-conv/info.txt b/test/593-checker-boolean-to-integral-conv/info.txt
new file mode 100644
index 0000000..2d883c7
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/info.txt
@@ -0,0 +1 @@
+Regression test for Boolean to integral types conversions.
diff --git a/test/593-checker-boolean-to-integral-conv/src/Main.java b/test/593-checker-boolean-to-integral-conv/src/Main.java
new file mode 100644
index 0000000..ba65839
--- /dev/null
+++ b/test/593-checker-boolean-to-integral-conv/src/Main.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    expectEqualsByte((byte)1, booleanToByte(true));
+    expectEqualsShort((short)1, booleanToShort(true));
+    expectEqualsChar((char)1, booleanToChar(true));
+    expectEqualsInt(1, booleanToInt(true));
+    expectEqualsLong(1L, booleanToLong(true));
+
+    expectEqualsInt(1, longToIntOfBoolean());
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToS:b\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: byte Main.booleanToByte(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static byte booleanToByte(boolean b) {
+    return (byte)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: short Main.booleanToShort(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: short Main.booleanToShort(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToS:s\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToS>>]
+
+  /// CHECK-START: short Main.booleanToShort(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static short booleanToShort(boolean b) {
+    return (short)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: char Main.booleanToChar(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToC>>]
+
+  /// CHECK-START: char Main.booleanToChar(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToC:c\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToC>>]
+
+  /// CHECK-START: char Main.booleanToChar(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static char booleanToChar(boolean b) {
+    return (char)(b ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.booleanToInt(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:                            Return [<<Phi>>]
+
+  /// CHECK-START: int Main.booleanToInt(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:                            Return [<<Sel>>]
+
+  /// CHECK-START: int Main.booleanToInt(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:                            Return [<<Arg>>]
+
+  static int booleanToInt(boolean b) {
+    return b ? 1 : 0;
+  }
+
+  /// CHECK-START: long Main.booleanToLong(boolean) builder (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Cond:z\d+>>          Equal [<<Arg>>,<<Zero>>]
+  /// CHECK-DAG:                            If [<<Cond>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:                            Return [<<IToJ>>]
+
+  /// CHECK-START: long Main.booleanToLong(boolean) select_generator (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Arg>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:                            Return [<<IToJ>>]
+
+  /// CHECK-START: long Main.booleanToLong(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK:         <<Arg:z\d+>>           ParameterValue
+  /// CHECK-DAG:     <<ZToJ:j\d+>>          TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                            Return [<<ZToJ>>]
+
+  static long booleanToLong(boolean b) {
+    return b ? 1 : 0;
+  }
+
+  /// CHECK-START: int Main.longToIntOfBoolean() builder (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:     <<ZToJ:j\d+>>          InvokeStaticOrDirect [<<Sget>>,<<Method>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<ZToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() inliner (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:                            If [<<Sget>>]
+  /// CHECK-DAG:     <<Phi:i\d+>>           Phi [<<One>>,<<Zero>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Phi>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() select_generator (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Zero:i\d+>>          IntConstant 0
+  /// CHECK-DAG:     <<One:i\d+>>           IntConstant 1
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:     <<Sel:i\d+>>           Select [<<Zero>>,<<One>>,<<Sget>>]
+  /// CHECK-DAG:     <<IToJ:j\d+>>          TypeConversion [<<Sel>>]
+  /// CHECK-DAG:     <<JToI:i\d+>>          TypeConversion [<<IToJ>>]
+  /// CHECK-DAG:                            Return [<<JToI>>]
+
+  /// CHECK-START: int Main.longToIntOfBoolean() instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:     <<Method:[ij]\d+>>     CurrentMethod
+  /// CHECK-DAG:     <<Sget:z\d+>>          StaticFieldGet
+  /// CHECK-DAG:                            Return [<<Sget>>]
+
+  static int longToIntOfBoolean() {
+    long l = booleanToLong(booleanField);
+    return (int) l;
+  }
+
+
+  private static void expectEqualsByte(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsShort(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsChar(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsInt(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEqualsLong(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+
+  public static boolean booleanField = true;
+
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/593-checker-long-to-float-regression/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/593-checker-long-to-float-regression/expected.txt
diff --git a/test/593-checker-long-to-float-regression/info.txt b/test/593-checker-long-to-float-regression/info.txt
new file mode 100644
index 0000000..39402e9
--- /dev/null
+++ b/test/593-checker-long-to-float-regression/info.txt
@@ -0,0 +1,3 @@
+Regression test for x86_64's code generator, which had a bug in
+the long-to-float implementation loading a constant as 64-bit double
+instead of 32-bit float.
diff --git a/test/593-checker-long-to-float-regression/src/Main.java b/test/593-checker-long-to-float-regression/src/Main.java
new file mode 100644
index 0000000..9c07f3d
--- /dev/null
+++ b/test/593-checker-long-to-float-regression/src/Main.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  static boolean doThrow = false;
+  static long longValue;
+
+  public static void assertEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertEquals(1.0F, $noinline$longToFloat());
+  }
+
+  /// CHECK-START: float Main.$noinline$longToFloat() register (after)
+  /// CHECK-DAG:     <<Const1:j\d+>>   LongConstant 1
+  /// CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const1>>]
+  /// CHECK-DAG:                       Return [<<Convert>>]
+
+  static float $noinline$longToFloat() {
+    if (doThrow) { throw new Error(); }
+    longValue = $inline$returnConst();
+    return (float) longValue;
+  }
+
+  static long $inline$returnConst() {
+    return 1L;
+  }
+}
diff --git a/test/593-checker-shift-and-simplifier/expected.txt b/test/593-checker-shift-and-simplifier/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/593-checker-shift-and-simplifier/info.txt b/test/593-checker-shift-and-simplifier/info.txt
new file mode 100644
index 0000000..2f4c7f5
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/info.txt
@@ -0,0 +1 @@
+Regression test on pattern that caused double removal of AND by ARM64 simplifier.
diff --git a/test/593-checker-shift-and-simplifier/src/Main.java b/test/593-checker-shift-and-simplifier/src/Main.java
new file mode 100644
index 0000000..65e809a
--- /dev/null
+++ b/test/593-checker-shift-and-simplifier/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private static int[] a = { 10 };
+
+  // A very particular set of operations that caused a double removal by the
+  // ARM64 simplifier doing "forward" removals (b/27851582).
+
+  /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (before)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG: <<Shl:i\d+>> Shl [<<Get>>,i{{\d+}}]
+  /// CHECK-DAG:              And [<<Not>>,<<Shl>>]
+  //
+  /// CHECK-START-ARM64: int Main.operations() instruction_simplifier_arm64 (after)
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet
+  /// CHECK-DAG: <<Not:i\d+>> Not [<<Get>>]
+  /// CHECK-DAG:              Arm64DataProcWithShifterOp [<<Not>>,<<Get>>] kind:And+LSL shift:2
+  private static int operations() {
+     int r = a[0];
+     int n = ~r;
+     int s = r << 2;
+     int a = s & n;
+     return a;
+  }
+
+  public static void main(String[] args) {
+    if (operations() != 32) {
+      System.out.println("failed");
+    } else {
+      System.out.println("passed");
+    }
+  }
+}
diff --git a/test/594-checker-array-alias/expected.txt b/test/594-checker-array-alias/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/594-checker-array-alias/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/594-checker-array-alias/info.txt b/test/594-checker-array-alias/info.txt
new file mode 100644
index 0000000..57c6de5
--- /dev/null
+++ b/test/594-checker-array-alias/info.txt
@@ -0,0 +1 @@
+Tests on array parameters with and without alias.
diff --git a/test/594-checker-array-alias/src/Main.java b/test/594-checker-array-alias/src/Main.java
new file mode 100644
index 0000000..5ece2e2
--- /dev/null
+++ b/test/594-checker-array-alias/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+//
+// Test on array parameters with or without potential aliasing.
+//
+public class Main {
+
+  //
+  // Cross-over on parameters with potential aliasing on parameters.
+  // The arrays a and b may point to the same memory, which (without
+  // further runtime tests) prevents hoisting the seemingly invariant
+  // array reference.
+  //
+
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop1(int[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop1(int a[], int b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop2(float[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop2(float a[], float b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop3(long[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop3(long a[], long b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.CrossOverLoop4(double[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void CrossOverLoop4(double a[], double b[]) {
+    b[20] = 99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // False cross-over on parameters. Parameters have same width (which used to
+  // cause a false type aliasing in an older version of the compiler), but since
+  // the types are different cannot be aliased. Thus, the invariant array
+  // reference can be hoisted.
+  //
+
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop1(int[], float[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop1(int a[], float b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (int) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop2(float[], int[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop2(float a[], int b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop3(long[], double[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop3(long a[], double b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (long) b[20] - 7;
+    }
+  }
+
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (before)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:{{B\d+}}
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  //
+  /// CHECK-START: void Main.FalseCrossOverLoop4(double[], long[]) licm (after)
+  /// CHECK-DAG: ArraySet loop:none
+  /// CHECK-DAG: ArrayGet loop:none
+  /// CHECK-DAG: ArraySet loop:{{B\d+}}
+  private static void FalseCrossOverLoop4(double a[], long b[]) {
+    b[20] = -99;
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[20] - 7;
+    }
+  }
+
+  //
+  // Main driver and testers.
+  //
+
+  public static void main(String[] args) {
+    int[] aI = new int[100];
+    float[] aF = new float[100];
+    long[] aJ = new long[100];
+    double[] aD = new double[100];
+
+    // Type I.
+    CrossOverLoop1(aI, aI);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aI[i]);
+    }
+    // Type F.
+    CrossOverLoop2(aF, aF);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aF[i]);
+    }
+    // Type J.
+    CrossOverLoop3(aJ, aJ);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aJ[i]);
+    }
+    // Type D.
+    CrossOverLoop4(aD, aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(i <= 20 ? 92 : 85, aD[i]);
+    }
+
+    // Type I vs F.
+    FalseCrossOverLoop1(aI, aF);
+    for (int i = 0; i < aI.length; i++) {
+      expectEquals(-106, aI[i]);
+    }
+    // Type F vs I.
+    FalseCrossOverLoop2(aF, aI);
+    for (int i = 0; i < aF.length; i++) {
+      expectEquals(-106, aF[i]);
+    }
+    // Type J vs D.
+    FalseCrossOverLoop3(aJ, aD);
+    for (int i = 0; i < aJ.length; i++) {
+      expectEquals(-106, aJ[i]);
+    }
+    // Type D vs J.
+    FalseCrossOverLoop4(aD, aJ);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals(-106, aD[i]);
+    }
+
+    // Real-world example where incorrect type assignment could introduce a bug.
+    // The library sorting algorithm is heavy on array reads and writes, and
+    // assigning the wrong J/D type to one of these would introduce errors.
+    for (int i = 0; i < aD.length; i++) {
+      aD[i] = aD.length - i - 1;
+    }
+    Arrays.sort(aD);
+    for (int i = 0; i < aD.length; i++) {
+      expectEquals((double) i, aD[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/594-checker-irreducible-linorder/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/594-checker-irreducible-linorder/expected.txt
diff --git a/test/594-checker-irreducible-linorder/info.txt b/test/594-checker-irreducible-linorder/info.txt
new file mode 100644
index 0000000..a1783f8
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/info.txt
@@ -0,0 +1,2 @@
+Regression test for a failing DCHECK in SSA liveness analysis in the presence
+of irreducible loops.
diff --git a/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali b/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..ef53ee8
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/smali/IrreducibleLoop.smali
@@ -0,0 +1,123 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+.super Ljava/lang/Object;
+
+# Test case where liveness analysis produces linear order where loop blocks are
+# not adjacent.
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) builder (after)
+## CHECK-DAG:     Add loop:none
+## CHECK-DAG:     Mul loop:<<Loop:B\d+>>
+## CHECK-DAG:     Not loop:<<Loop>>
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) liveness (after)
+## CHECK-DAG:     Add liveness:<<LPreEntry:\d+>>
+## CHECK-DAG:     Mul liveness:<<LHeader:\d+>>
+## CHECK-DAG:     Not liveness:<<LBackEdge:\d+>>
+## CHECK-EVAL:    (<<LHeader>> < <<LPreEntry>>) and (<<LPreEntry>> < <<LBackEdge>>)
+
+.method public static liveness(ZZZI)I
+   .registers 10
+   const/16 v0, 42
+
+   if-eqz p0, :header
+
+   :pre_entry
+   add-int/2addr p3, p3
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body1
+
+   # Trivially dead code to ensure linear order verification skips removed blocks (b/28252537).
+   :dead_code
+   nop
+   goto :dead_code
+
+   :header
+   mul-int/2addr p3, p3
+   if-eqz p1, :body2
+
+   :body1
+   goto :body_merge
+
+   :body2
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body_merge
+
+   :body_merge
+   if-eqz p2, :exit
+
+   :back_edge
+   not-int p3, p3
+   goto :header
+
+   :exit
+   return p3
+
+.end method
+
+## CHECK-START: int IrreducibleLoop.liveness2(boolean, boolean, boolean, int) builder (after)
+## CHECK-DAG:     Mul loop:<<Loop:B\d+>>
+## CHECK-DAG:     Not loop:<<Loop>>
+
+## CHECK-START: int IrreducibleLoop.liveness2(boolean, boolean, boolean, int) liveness (after)
+## CHECK-DAG:     Mul liveness:<<LPreEntry2:\d+>>
+## CHECK-DAG:     Not liveness:<<LBackEdge1:\d+>>
+## CHECK-EVAL:    <<LBackEdge1>> < <<LPreEntry2>>
+
+.method public liveness2(ZZZI)I
+    .registers 10
+
+    const v1, 1
+
+    :header1
+    if-eqz p0, :body1
+
+    :exit
+    return p3
+
+    :body1
+    # The test will generate an incorrect linear order when the following IF swaps
+    # its successors. To do that, load a boolean value and compare NotEqual to 1.
+    sget-boolean v2, LIrreducibleLoop;->f:Z
+    const v3, 1
+    if-ne v2, v3, :pre_header2
+
+    :pre_entry2
+    # This constant has a use in a phi in :back_edge2 and a back edge use in
+    # :back_edge1. Because the linear order is wrong, the back edge use has
+    # a lower liveness than the phi use.
+    const v0, 42
+    mul-int/2addr p3, p3
+    goto :back_edge2
+
+    :back_edge2
+    add-int/2addr p3, v0
+    add-int/2addr v0, v1
+    goto :header2
+
+    :header2
+    if-eqz p2, :back_edge2
+
+    :back_edge1
+    not-int p3, p3
+    goto :header1
+
+    :pre_header2
+    const v0, 42
+    goto :header2
+.end method
+
+.field public static f:Z
diff --git a/test/594-checker-irreducible-linorder/src/Main.java b/test/594-checker-irreducible-linorder/src/Main.java
new file mode 100644
index 0000000..38b2ab4
--- /dev/null
+++ b/test/594-checker-irreducible-linorder/src/Main.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {
+    // Nothing to run. This regression test merely makes sure the smali test
+    // case successfully compiles.
+  }
+}
diff --git a/test/594-invoke-super/expected.txt b/test/594-invoke-super/expected.txt
new file mode 100644
index 0000000..de26026
--- /dev/null
+++ b/test/594-invoke-super/expected.txt
@@ -0,0 +1,7 @@
+new A
+I am A's foo
+new B
+I am B's foo
+new A
+new B
+passed
diff --git a/test/594-invoke-super/info.txt b/test/594-invoke-super/info.txt
new file mode 100644
index 0000000..440d8b8
--- /dev/null
+++ b/test/594-invoke-super/info.txt
@@ -0,0 +1 @@
+Invoke-super on various references.
diff --git a/test/594-invoke-super/smali/invoke-super.smali b/test/594-invoke-super/smali/invoke-super.smali
new file mode 100644
index 0000000..6f787dd
--- /dev/null
+++ b/test/594-invoke-super/smali/invoke-super.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LZ;
+.super LA;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LA;-><init>()V
+    return-void
+.end method
+
+.method public foo()V
+.registers 3
+    new-instance v0, LY;
+    invoke-direct {v0}, LY;-><init>()V
+    invoke-super {v0}, LY;->foo()V
+    return-void
+.end method
diff --git a/test/594-invoke-super/src/Main.java b/test/594-invoke-super/src/Main.java
new file mode 100644
index 0000000..53f2bbf
--- /dev/null
+++ b/test/594-invoke-super/src/Main.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+//
+// Two classes A and B with method foo().
+//
+
+class A {
+  A() { System.out.println("new A"); }
+
+  public void foo() { System.out.println("I am A's foo"); }
+
+  // We previously used to invoke this method with a Y instance, due
+  // to invoke-super underspecified behavior.
+  public void bar() { System.out.println("I am A's bar"); }
+}
+
+class B {
+  B() { System.out.println("new B"); }
+
+  public void foo() { System.out.println("I am B's foo"); }
+}
+
+//
+// Two subclasses X and Y that call foo() on super.
+//
+
+class X extends A {
+  public void foo() { super.foo(); }
+}
+
+class Y extends B {
+  public void foo() { super.foo(); }
+}
+
+//
+// Driver class.
+//
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    // The normal stuff, X's super goes to A, Y's super goes to B.
+    new X().foo();
+    new Y().foo();
+
+    // And now it gets interesting.
+
+    // In bytecode, we define a class Z that is a subclass of A, and we call
+    // invoke-super on an instance of Y.
+    Class<?> z = Class.forName("Z");
+    Method m = z.getMethod("foo");
+    try {
+      m.invoke(z.newInstance());
+      throw new Error("Expected InvocationTargetException");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+
+    System.out.println("passed");
+  }
+}
diff --git a/test/594-load-string-regression/expected.txt b/test/594-load-string-regression/expected.txt
new file mode 100644
index 0000000..365b0e1
--- /dev/null
+++ b/test/594-load-string-regression/expected.txt
@@ -0,0 +1 @@
+String: ""
diff --git a/test/594-load-string-regression/info.txt b/test/594-load-string-regression/info.txt
new file mode 100644
index 0000000..6a07ace
--- /dev/null
+++ b/test/594-load-string-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for LoadString listing side effects when it doesn't have any
+and triggering a DCHECK() failure when merging ClinitCheck into NewInstance.
diff --git a/test/594-load-string-regression/src/Main.java b/test/594-load-string-regression/src/Main.java
new file mode 100644
index 0000000..0b9f7b5
--- /dev/null
+++ b/test/594-load-string-regression/src/Main.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  static boolean doThrow = false;
+
+  // Note: We're not doing checker tests as we cannot do them specifically for a non-PIC
+  // configuration. The check here would be "prepare_for_register_allocation (before)"
+  //     CHECK:         LoadClass
+  //     CHECK-NEXT:    ClinitCheck
+  //     CHECK-NEXT:    LoadString load_kind:BootImageAddress
+  //     CHECK-NEXT:    NewInstance
+  // and "prepare_for_register_allocation (after)"
+  //     CHECK:         LoadString
+  //     CHECK-NEXT:    NewInstance
+  // but the order of instructions for non-PIC mode is different.
+  public static int $noinline$test() {
+    if (doThrow) { throw new Error(); }
+
+    int r = 0x12345678;
+    do {
+      // LICM pulls the LoadClass and ClinitCheck out of the loop, leaves NewInstance in the loop.
+      Helper h = new Helper();
+      // For non-PIC mode, LICM pulls the boot image LoadString out of the loop.
+      // (For PIC mode, the LoadString can throw and will not be moved out of the loop.)
+      String s = "";  // Empty string is known to be in the boot image.
+      r = r ^ (r >> 5);
+      h.$noinline$printString(s);
+      // During DCE after inlining, the loop back-edge disappears and the pre-header is
+      // merged with the body, leaving consecutive LoadClass, ClinitCheck, LoadString
+      // and NewInstance in non-PIC mode. The prepare_for_register_allocation pass
+      // merges the LoadClass and ClinitCheck with the NewInstance and checks that
+      // there are no instructions with side effects in between. This check used to
+      // fail because LoadString was always listing SideEffects::CanTriggerGC() even
+      // when it doesn't really have any side effects, i.e. for direct references to
+      // boot image Strings or for Strings known to be in the dex cache.
+    } while ($inline$shouldContinue());
+    return r;
+  }
+
+  static boolean $inline$shouldContinue() {
+    return false;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(0x12345678 ^ (0x12345678 >> 5), $noinline$test());
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
+
+class Helper {
+  static boolean doThrow = false;
+
+  public void $noinline$printString(String s) {
+    if (doThrow) { throw new Error(); }
+
+    System.out.println("String: \"" + s + "\"");
+  }
+}
diff --git a/test/595-error-class/expected.txt b/test/595-error-class/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/595-error-class/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/595-error-class/info.txt b/test/595-error-class/info.txt
new file mode 100644
index 0000000..a58b8b3
--- /dev/null
+++ b/test/595-error-class/info.txt
@@ -0,0 +1 @@
+Regression test on merging array type with error component type.
diff --git a/test/595-error-class/smali/error.smali b/test/595-error-class/smali/error.smali
new file mode 100644
index 0000000..925c34b
--- /dev/null
+++ b/test/595-error-class/smali/error.smali
@@ -0,0 +1,23 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public final LAnError;
+
+.super LSuperOfAnError;
+
+# Override a final method to put this class in the error state.
+.method public foo()V
+  .registers 1
+  return-void
+.end method
diff --git a/test/595-error-class/smali/merge.smali b/test/595-error-class/smali/merge.smali
new file mode 100644
index 0000000..2f8b415
--- /dev/null
+++ b/test/595-error-class/smali/merge.smali
@@ -0,0 +1,31 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMerge;
+.super Ljava/lang/Object;
+
+# Method that selects between x = new Integer[] or new AnError[],
+# Reference type propagation should correctly see error in component type.
+.method public static select(Z)Ljava/lang/Object;
+    .registers 2
+    const/16 v0, 10
+    if-eqz v1, :Skip
+    new-array v0, v0, [LAnError;
+    goto :Done
+:Skip
+    new-array v0, v0, [Ljava/lang/Integer;
+:Done
+    return-object v0
+.end method
diff --git a/test/595-error-class/smali/super.smali b/test/595-error-class/smali/super.smali
new file mode 100644
index 0000000..da7467d
--- /dev/null
+++ b/test/595-error-class/smali/super.smali
@@ -0,0 +1,22 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSuperOfAnError;
+
+.super Ljava/lang/Object;
+
+.method public final foo()V
+  .registers 1
+  return-void
+.end method
diff --git a/test/595-error-class/src/Main.java b/test/595-error-class/src/Main.java
new file mode 100644
index 0000000..655fa43
--- /dev/null
+++ b/test/595-error-class/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  public static void main(String args[]) throws Throwable {
+    Class<?> c = Class.forName("Merge");
+    Method m = c.getMethod("select", boolean.class);
+    Object x = m.invoke(null, true);
+    if (x == null) {
+      throw new Error("Did not get array");
+    }
+    System.out.println("passed");
+  }
+}
diff --git a/test/595-profile-saving/expected.txt b/test/595-profile-saving/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/595-profile-saving/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/595-profile-saving/info.txt b/test/595-profile-saving/info.txt
new file mode 100644
index 0000000..5d318f5
--- /dev/null
+++ b/test/595-profile-saving/info.txt
@@ -0,0 +1 @@
+Check that profile recording works even when JIT compilation is not enabled.
diff --git a/test/595-profile-saving/profile-saving.cc b/test/595-profile-saving/profile-saving.cc
new file mode 100644
index 0000000..0d26f45
--- /dev/null
+++ b/test/595-profile-saving/profile-saving.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file.h"
+
+#include "art_method-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "jit/profile_saver.h"
+#include "jni.h"
+#include "method_reference.h"
+#include "mirror/class-inl.h"
+#include "oat_file_assistant.h"
+#include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "thread.h"
+
+namespace art {
+namespace {
+
+class CreateProfilingInfoVisitor : public StackVisitor {
+ public:
+  explicit CreateProfilingInfoVisitor(Thread* thread, const char* method_name)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_name_(method_name) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name.compare(method_name_) == 0) {
+      ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
+      method_index_ = m->GetDexMethodIndex();
+      return false;
+    }
+    return true;
+  }
+
+  int method_index_ = -1;
+  const char* const method_name_;
+};
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_ensureProfilingInfo(JNIEnv* env,
+                                                                jclass,
+                                                                jstring method_name) {
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  CreateProfilingInfoVisitor visitor(soa.Self(), chars.c_str());
+  visitor.WalkStack();
+  return visitor.method_index_;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureProfileProcessing(JNIEnv*, jclass) {
+  ProfileSaver::ForceProcessProfiles();
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_presentInProfile(
+      JNIEnv* env, jclass cls, jstring filename, jint method_index) {
+  ScopedUtfChars filename_chars(env, filename);
+  CHECK(filename_chars.c_str() != nullptr);
+  ScopedObjectAccess soa(Thread::Current());
+  const DexFile* dex_file = soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetDexFile();
+  return ProfileSaver::HasSeenMethod(std::string(filename_chars.c_str()),
+                                     dex_file,
+                                     static_cast<uint16_t>(method_index));
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run
new file mode 100644
index 0000000..068ad03
--- /dev/null
+++ b/test/595-profile-saving/run
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use
+# --compiler-filter=interpret-only to make sure that the test is not compiled AOT
+# and to make sure the test is not compiled  when loaded (by PathClassLoader)
+# -Xjitsaveprofilinginfo to enable profile saving
+# -Xusejit:false to disable jit and only test profiles.
+exec ${RUN} \
+  -Xcompiler-option --compiler-filter=interpret-only \
+  --runtime-option '-Xcompiler-option --compiler-filter=interpret-only' \
+  --runtime-option -Xjitsaveprofilinginfo \
+  --runtime-option -Xusejit:false \
+  "${@}"
diff --git a/test/595-profile-saving/src/Main.java b/test/595-profile-saving/src/Main.java
new file mode 100644
index 0000000..039503f
--- /dev/null
+++ b/test/595-profile-saving/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+
+    File file = null;
+    try {
+      file = createTempFile();
+      // String codePath = getDexBaseLocation();
+      String codePath = System.getenv("DEX_LOCATION") + "/595-profile-saving.jar";
+      VMRuntime.registerAppInfo(file.getPath(),
+                                System.getenv("DEX_LOCATION"),
+                                new String[] {codePath},
+                                /* foreignProfileDir */ null);
+
+      int methodIdx = $opt$noinline$testProfile();
+      ensureProfileProcessing();
+      if (!presentInProfile(file.getPath(), methodIdx)) {
+        throw new RuntimeException("Method with index " + methodIdx + " not in the profile");
+      }
+    } finally {
+      if (file != null) {
+        file.delete();
+      }
+    }
+  }
+
+  public static int $opt$noinline$testProfile() {
+    if (doThrow) throw new Error();
+    // Make sure we have a profile info for this method without the need to loop.
+    return ensureProfilingInfo("$opt$noinline$testProfile");
+  }
+
+  // Return the dex method index.
+  public static native int ensureProfilingInfo(String methodName);
+  // Ensures the profile saver does its usual processing.
+  public static native void ensureProfileProcessing();
+  // Checks if the profiles saver knows about the method.
+  public static native boolean presentInProfile(String profile, int methodIdx);
+
+  public static boolean doThrow = false;
+  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
+  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
+
+  static native String getProfileInfoDump(
+      String filename);
+
+  private static File createTempFile() throws Exception {
+    try {
+      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+
+  private static class VMRuntime {
+    private static final Method registerAppInfoMethod;
+    static {
+      try {
+        Class<? extends Object> c = Class.forName("dalvik.system.VMRuntime");
+        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
+            String.class, String.class, String[].class, String.class);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void registerAppInfo(String profile, String appDir,
+                                       String[] codePaths, String foreignDir) throws Exception {
+      registerAppInfoMethod.invoke(null, profile, appDir, codePaths, foreignDir);
+    }
+  }
+}
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
new file mode 100644
index 0000000..a5bbf5f
--- /dev/null
+++ b/test/596-app-images/app_images.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "image.h"
+#include "jni.h"
+#include "mirror/class.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageLoaded(JNIEnv*, jclass) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        return JNI_TRUE;
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkAppImageContains(JNIEnv*, jclass, jclass c) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass_ptr = soa.Decode<mirror::Class*>(c);
+  for (auto* space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
+    if (space->IsImageSpace()) {
+      auto* image_space = space->AsImageSpace();
+      const auto& image_header = image_space->GetImageHeader();
+      if (image_header.IsAppImage()) {
+        if (image_space->HasAddress(klass_ptr)) {
+          return JNI_TRUE;
+        }
+      }
+    }
+  }
+  return JNI_FALSE;
+}
+
+}  // namespace
+
+}  // namespace art
diff --git a/test/596-app-images/expected.txt b/test/596-app-images/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/596-app-images/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/596-app-images/info.txt b/test/596-app-images/info.txt
new file mode 100644
index 0000000..a3d5e7e
--- /dev/null
+++ b/test/596-app-images/info.txt
@@ -0,0 +1 @@
+Tests that app-images are loaded and used.
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
new file mode 100644
index 0000000..75b31b8
--- /dev/null
+++ b/test/596-app-images/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  static class Inner {
+    public static int abc = 0;
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if (!checkAppImageLoaded()) {
+      System.out.println("App image is not loaded!");
+    } else if (!checkAppImageContains(Inner.class)) {
+      System.out.println("App image does not contain Inner!");
+    }
+  }
+
+  public static native boolean checkAppImageLoaded();
+  public static native boolean checkAppImageContains(Class<?> klass);
+}
diff --git a/test/596-checker-dead-phi/expected.txt b/test/596-checker-dead-phi/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/596-checker-dead-phi/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/596-checker-dead-phi/info.txt b/test/596-checker-dead-phi/info.txt
new file mode 100644
index 0000000..7f7cf0f
--- /dev/null
+++ b/test/596-checker-dead-phi/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing where we used to replace a dead loop
+phi with its first incoming input.
diff --git a/test/596-checker-dead-phi/smali/IrreducibleLoop.smali b/test/596-checker-dead-phi/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..bab2ba9
--- /dev/null
+++ b/test/596-checker-dead-phi/smali/IrreducibleLoop.smali
@@ -0,0 +1,74 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+# Test case where liveness analysis produces linear order where loop blocks are
+# not adjacent. This revealed a bug in our SSA builder, where a dead loop phi would
+# be replaced by its incoming input during SsaRedundantPhiElimination.
+
+# Check that the outer loop suspend check environment only has the parameter vreg.
+## CHECK-START: int IrreducibleLoop.liveness(int) builder (after)
+## CHECK-DAG:     <<Phi:i\d+>> Phi reg:4 loop:{{B\d+}} irreducible:false
+## CHECK-DAG:     SuspendCheck env:[[_,_,_,_,<<Phi>>]] loop:{{B\d+}} irreducible:false
+
+# Check that the linear order has non-adjacent loop blocks.
+## CHECK-START: int IrreducibleLoop.liveness(int) liveness (after)
+## CHECK-DAG:     Mul liveness:<<LPreEntry2:\d+>>
+## CHECK-DAG:     Add liveness:<<LBackEdge1:\d+>>
+## CHECK-EVAL:    <<LBackEdge1>> < <<LPreEntry2>>
+
+.method public static liveness(I)I
+    .registers 5
+
+    const-string v1, "MyString"
+
+    :header1
+    if-eqz p0, :body1
+
+    :exit
+    return p0
+
+    :body1
+    # The test will generate an incorrect linear order when the following IF swaps
+    # its successors. To do that, load a boolean value and compare NotEqual to 1.
+    sget-boolean v2, LIrreducibleLoop;->f:Z
+    const v3, 1
+    if-ne v2, v3, :pre_header2
+
+    :pre_entry2
+    # Add a marker on the irreducible loop entry.
+    mul-int/2addr p0, p0
+    goto :back_edge2
+
+    :back_edge2
+    goto :header2
+
+    :header2
+    if-eqz p0, :back_edge2
+
+    :back_edge1
+    # Add a marker on the outer loop back edge.
+    add-int/2addr p0, p0
+    # Set a wide register, to have v1 undefined at the back edge.
+    const-wide/16 v0, 0x1
+    goto :header1
+
+    :pre_header2
+    goto :header2
+.end method
+
+.field public static f:Z
diff --git a/test/596-checker-dead-phi/src/Main.java b/test/596-checker-dead-phi/src/Main.java
new file mode 100644
index 0000000..5a3fffc
--- /dev/null
+++ b/test/596-checker-dead-phi/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    // Note that we don't actually enter the loops in the 'liveness'
+    // method, so this is just a sanity check that part of the code we
+    // generated for that method is correct.
+    Method m = c.getMethod("liveness", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/597-deopt-new-string/deopt.cc b/test/597-deopt-new-string/deopt.cc
new file mode 100644
index 0000000..844a786
--- /dev/null
+++ b/test/597-deopt-new-string/deopt.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "mirror/class-inl.h"
+#include "runtime.h"
+#include "thread_list.h"
+#include "thread_state.h"
+#include "gc/gc_cause.h"
+#include "gc/scoped_gc_critical_section.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_deoptimizeAll(
+    JNIEnv* env,
+    jclass cls ATTRIBUTE_UNUSED) {
+  ScopedObjectAccess soa(env);
+  ScopedThreadSuspension sts(Thread::Current(), kWaitingForDeoptimization);
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseInstrumentation,
+                                  gc::kCollectorTypeInstrumentation);
+  // We need to suspend mutator threads first.
+  ScopedSuspendAll ssa(__FUNCTION__);
+  static bool first = true;
+  if (first) {
+    // We need to enable deoptimization once in order to call DeoptimizeEverything().
+    Runtime::Current()->GetInstrumentation()->EnableDeoptimization();
+    first = false;
+  }
+  Runtime::Current()->GetInstrumentation()->DeoptimizeEverything("test");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_undeoptimizeAll(
+    JNIEnv* env,
+    jclass cls ATTRIBUTE_UNUSED) {
+  ScopedObjectAccess soa(env);
+  ScopedThreadSuspension sts(Thread::Current(), kWaitingForDeoptimization);
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseInstrumentation,
+                                  gc::kCollectorTypeInstrumentation);
+  // We need to suspend mutator threads first.
+  ScopedSuspendAll ssa(__FUNCTION__);
+  Runtime::Current()->GetInstrumentation()->UndeoptimizeEverything("test");
+}
+
+}  // namespace art
diff --git a/test/597-deopt-new-string/expected.txt b/test/597-deopt-new-string/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/597-deopt-new-string/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/597-deopt-new-string/info.txt b/test/597-deopt-new-string/info.txt
new file mode 100644
index 0000000..1bd1f79
--- /dev/null
+++ b/test/597-deopt-new-string/info.txt
@@ -0,0 +1 @@
+Regression test for b/28555675
diff --git a/test/597-deopt-new-string/run b/test/597-deopt-new-string/run
new file mode 100644
index 0000000..9776ab3
--- /dev/null
+++ b/test/597-deopt-new-string/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run in debuggable mode which keeps the call into StringFactory.newEmptyString().
+exec ${RUN} -Xcompiler-option --debuggable "${@}"
diff --git a/test/597-deopt-new-string/src/Main.java b/test/597-deopt-new-string/src/Main.java
new file mode 100644
index 0000000..1224e40
--- /dev/null
+++ b/test/597-deopt-new-string/src/Main.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 2;
+    static final int totalOperations = 40000;
+    static boolean sFlag = false;
+    static volatile boolean done = false;
+    int threadIndex;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+
+    Main(int index) {
+        threadIndex = index;
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        final Thread[] threads = new Thread[numberOfThreads];
+        for (int t = 0; t < threads.length; t++) {
+            threads[t] = new Thread(new Main(t));
+            threads[t].start();
+        }
+        for (Thread t : threads) {
+            t.join();
+        }
+        System.out.println("Finishing");
+    }
+
+    public String $noinline$run0() {
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        char[] arr = {'a', 'b', 'c'};
+        return new String(arr, 0, arr.length);
+    }
+
+    public void run() {
+        if (threadIndex == 0) {
+            // This thread keeps doing deoptimization of all threads.
+            // Hopefully that will trigger one deoptimization when returning from
+            // StringFactory.newEmptyString() in one of the other threads.
+            for (int i = 0; i < totalOperations; ++i) {
+                if (i % 50 == 0) {
+                    deoptimizeAll();
+                }
+                if (i % 50 == 25) {
+                    undeoptimizeAll();
+                }
+            }
+            done = true;
+        } else {
+            // This thread keeps doing new String() from a char array.
+            while (!done) {
+                $noinline$run0();
+            }
+        }
+    }
+}
diff --git a/test/525-checker-arrays-and-fields/expected.txt b/test/598-checker-irreducible-dominance/expected.txt
similarity index 100%
copy from test/525-checker-arrays-and-fields/expected.txt
copy to test/598-checker-irreducible-dominance/expected.txt
diff --git a/test/598-checker-irreducible-dominance/info.txt b/test/598-checker-irreducible-dominance/info.txt
new file mode 100644
index 0000000..8ca4e63
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/info.txt
@@ -0,0 +1,2 @@
+Regression test for HGraphBuilder which would compute wrong dominance information
+in the presence of irreducible loops.
\ No newline at end of file
diff --git a/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali b/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..4d8b515
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/smali/IrreducibleLoop.smali
@@ -0,0 +1,52 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+.super Ljava/lang/Object;
+
+# Test case in which `inner_back_edge` is not dominated by `inner_header` and
+# causes `outer_back_edge` to not be dominated by `outer_header`. HGraphBuilder
+# not do a fix-point iteration and would miss the path to `outer_back_edge`
+# through `inner_back_edge` and incorrectly label the outer loop non-irreducible.
+
+## CHECK-START: int IrreducibleLoop.dominance(int) builder (after)
+## CHECK:         Add irreducible:true
+
+.method public static dominance(I)I
+    .registers 2
+
+    if-eqz p0, :outer_header
+    goto :inner_back_edge
+
+    :outer_header
+    if-eqz p0, :inner_header
+
+    :outer_branch_exit
+    if-eqz p0, :outer_merge
+    return p0
+
+    :inner_header
+    goto :outer_merge
+
+    :inner_back_edge
+    goto :inner_header
+
+    :outer_merge
+    if-eqz p0, :inner_back_edge
+
+    :outer_back_edge
+    add-int/2addr p0, p0
+    goto :outer_header
+
+.end method
diff --git a/test/598-checker-irreducible-dominance/src/Main.java b/test/598-checker-irreducible-dominance/src/Main.java
new file mode 100644
index 0000000..38b2ab4
--- /dev/null
+++ b/test/598-checker-irreducible-dominance/src/Main.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {
+    // Nothing to run. This regression test merely makes sure the smali test
+    // case successfully compiles.
+  }
+}
diff --git a/test/599-checker-irreducible-loop/expected.txt b/test/599-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/test/599-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+0
diff --git a/test/599-checker-irreducible-loop/info.txt b/test/599-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/599-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..5331fd6
--- /dev/null
+++ b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,56 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (before)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (after)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+.method public static test(I)I
+   .registers 2
+
+   sget v0, LIrreducibleLoop;->field1:I
+   sput v0, LIrreducibleLoop;->field2:I
+
+   if-eqz p0, :loop_entry
+   goto :exit
+
+   :loop_entry
+   if-eqz p0, :irreducible_loop_entry
+   sget v0, LIrreducibleLoop;->field2:I
+   sput v0, LIrreducibleLoop;->field1:I
+   if-eqz v0, :exit
+   goto :irreducible_other_loop_entry
+
+   :irreducible_loop_entry
+   if-eqz p0, :loop_back_edge
+   :irreducible_other_loop_entry
+   if-eqz v0, :loop_back_edge
+   goto :irreducible_loop_entry
+
+   :loop_back_edge
+   goto :loop_entry
+
+   :exit
+   return v0
+.end method
+
+.field public static field1:I
+.field public static field2:I
diff --git a/test/599-checker-irreducible-loop/src/Main.java b/test/599-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..b47721f
--- /dev/null
+++ b/test/599-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test", int.class);
+    Object[] arguments = { 42 };
+    // Invoke the code just for sanity checking.
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 8808a50..11150c2 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -60,4 +60,11 @@
 b/26594149 (8)
 b/27148248
 b/26965384
+b/27799205 (1)
+b/27799205 (2)
+b/27799205 (3)
+b/27799205 (4)
+b/27799205 (5)
+b/27799205 (6)
+b/28187158
 Done!
diff --git a/test/800-smali/smali/b_27799205_1.smali b/test/800-smali/smali/b_27799205_1.smali
new file mode 100644
index 0000000..92bfc80
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_1.smali
@@ -0,0 +1,37 @@
+.class public LB27799205_1;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+.method public static test([Ljava/lang/Object;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 will be the unresolved merge.
+
+       # Test aput: v0[v2] = v1.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_2.smali b/test/800-smali/smali/b_27799205_2.smali
new file mode 100644
index 0000000..e730b1e
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_2.smali
@@ -0,0 +1,37 @@
+.class public LB27799205_2;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+.method public static test([Ljava/lang/Object;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 will be the unresolved merge.
+
+       # Test aput: v0[v2] = v1.
+       aput v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_3.smali b/test/800-smali/smali/b_27799205_3.smali
new file mode 100644
index 0000000..1cb025e
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_3.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_3;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-object non-array input (non-Object because the merge should be Object).
+.method public static test(Ljava/lang/Integer;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_4.smali b/test/800-smali/smali/b_27799205_4.smali
new file mode 100644
index 0000000..e42951a
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_4.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_4;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some primitive-type array input.
+.method public static test([I[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_5.smali b/test/800-smali/smali/b_27799205_5.smali
new file mode 100644
index 0000000..6c7b183
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_5.smali
@@ -0,0 +1,39 @@
+.class public LB27799205_5;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that merging is pro-active.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-resolvable non-array type.
+.method public static test(Ldo/not/resolve/L;[Ldo/not/resolve/K;Z)V
+.registers 6
+       # Make v0, v1 and v2 null. We'll use v0 as a merge of the inputs, v1 as null, and v2 as 0.
+       const v0, 0
+       const v1, 0
+       const v2, 0
+
+       # Conditional jump so we have a merge point.
+       if-eqz v5, :LabelSelectUnresolved
+
+:LabelSelectResolved
+       move-object v0, v3
+       goto :LabelMerged
+
+:LabelSelectUnresolved
+       move-object v0, v4
+       goto :LabelMerged
+
+:LabelMerged
+       # At this point, v0 should be Object.
+
+       # Test aput-object: v0[v2] = v1. Should fail for v0 not being an array.
+       aput-object v1, v0, v2
+
+       return-void
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_6.smali b/test/800-smali/smali/b_27799205_6.smali
new file mode 100644
index 0000000..d0154f7
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_6.smali
@@ -0,0 +1,24 @@
+.class public LB27799205_6;
+.super Ljava/lang/Object;
+
+# A class with an unresolved array type should not fail hard (unless it's a primitive-type access).
+# Make sure that non-merged types still work.
+
+.method public static run()V
+.registers 1
+       return-void
+.end method
+
+# Use some non-resolvable array type.
+.method public static test([Ldo/not/resolve/K;)Ldo/not/resolve/K;
+.registers 3
+       const v0, 0
+       const v1, 0
+       # v2 = p0
+
+       # v0 := v2[v1]
+       aget-object v0, v2, v1
+
+       return-object v0
+
+.end method
diff --git a/test/800-smali/smali/b_27799205_helper.smali b/test/800-smali/smali/b_27799205_helper.smali
new file mode 100644
index 0000000..e6d0985
--- /dev/null
+++ b/test/800-smali/smali/b_27799205_helper.smali
@@ -0,0 +1,47 @@
+.class public LB27799205Helper;
+.super Ljava/lang/Object;
+
+# Helper for B27799205. Reflection tries to resolve all types. That's bad for intentionally
+# unresolved types. It makes it harder to distinguish what kind of error we got.
+
+.method public static run1()V
+.registers 1
+       invoke-static {}, LB27799205_1;->run()V
+
+       return-void
+.end method
+
+.method public static run2()V
+.registers 1
+       invoke-static {}, LB27799205_2;->run()V
+
+       return-void
+.end method
+
+.method public static run3()V
+.registers 1
+       invoke-static {}, LB27799205_3;->run()V
+
+       return-void
+.end method
+
+.method public static run4()V
+.registers 1
+       invoke-static {}, LB27799205_4;->run()V
+
+       return-void
+.end method
+
+.method public static run5()V
+.registers 1
+       invoke-static {}, LB27799205_5;->run()V
+
+       return-void
+.end method
+
+.method public static run6()V
+.registers 1
+       invoke-static {}, LB27799205_6;->run()V
+
+       return-void
+.end method
diff --git a/test/800-smali/smali/b_28187158.smali b/test/800-smali/smali/b_28187158.smali
new file mode 100644
index 0000000..14d5cec
--- /dev/null
+++ b/test/800-smali/smali/b_28187158.smali
@@ -0,0 +1,12 @@
+.class public LB28187158;
+
+# Regression test for iget with wrong classes.
+
+.super Ljava/lang/Object;
+
+.method public static run(Ljava/lang/Integer;)V
+   .registers 2
+   iget v0, p0, Ljava/lang/System;->in:Ljava/io/InputStream;
+   return-void
+.end method
+
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 4e6de46..c883b7f 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -164,6 +164,18 @@
                 null));
         testCases.add(new TestCase("b/26965384", "B26965384", "run", null, new VerifyError(),
                 null));
+        testCases.add(new TestCase("b/27799205 (1)", "B27799205Helper", "run1", null, null, null));
+        testCases.add(new TestCase("b/27799205 (2)", "B27799205Helper", "run2", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (3)", "B27799205Helper", "run3", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (4)", "B27799205Helper", "run4", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (5)", "B27799205Helper", "run5", null,
+                new VerifyError(), null));
+        testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
+        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null} ,
+                new VerifyError(), null));
     }
 
     public void runTests() {
diff --git a/test/803-no-super/expected.txt b/test/803-no-super/expected.txt
new file mode 100644
index 0000000..5036991
--- /dev/null
+++ b/test/803-no-super/expected.txt
@@ -0,0 +1,2 @@
+java.lang.ClassNotFoundException: NoSuper1
+Done!
diff --git a/test/803-no-super/info.txt b/test/803-no-super/info.txt
new file mode 100644
index 0000000..0178a44
--- /dev/null
+++ b/test/803-no-super/info.txt
@@ -0,0 +1,3 @@
+Regression test that temp (erroneous) classes don't get conflict tables created.
+
+Obviously needs to run under Dalvik or ART.
diff --git a/test/803-no-super/smali/nosuper1.smali b/test/803-no-super/smali/nosuper1.smali
new file mode 100644
index 0000000..df2eaa5
--- /dev/null
+++ b/test/803-no-super/smali/nosuper1.smali
@@ -0,0 +1,3 @@
+.class public LNoSuper1;
+
+.super LNoClass;
diff --git a/test/803-no-super/src/Main.java b/test/803-no-super/src/Main.java
new file mode 100644
index 0000000..a07e042
--- /dev/null
+++ b/test/803-no-super/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Attempt to load class with no superclass.
+ */
+public class Main {
+    public static void main(String[] args) throws Exception {
+        try {
+            Class<?> c = Class.forName("NoSuper1");
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        System.out.println("Done!");
+    }
+}
diff --git a/test/955-lambda-smali/build b/test/955-lambda-smali/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/955-lambda-smali/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/960-default-smali/expected.txt b/test/960-default-smali/expected.txt
index 7671eed..f3db93f 100644
--- a/test/960-default-smali/expected.txt
+++ b/test/960-default-smali/expected.txt
@@ -82,3 +82,19 @@
 J-interface   Greeter.SayHiTwice()='Hi Hi '
 J-virtual           J.SayHiTwice()='Hi Hi '
 End testing for type J
+Testing for type K
+K-interface       Foo.bar()='foobar'
+K-virtual           K.bar()='foobar'
+End testing for type K
+Testing for type L
+L-interface       Foo.bar()='foobar'
+L-virtual           K.bar()='foobar'
+L-virtual           L.bar()='foobar'
+End testing for type L
+Testing for type M
+M-interface       Foo.bar()='BAZ!'
+M-interface     Fooer.bar()='BAZ!'
+M-virtual           K.bar()='BAZ!'
+M-virtual           L.bar()='BAZ!'
+M-virtual           M.bar()='BAZ!'
+End testing for type M
diff --git a/test/960-default-smali/src/Foo.java b/test/960-default-smali/src/Foo.java
new file mode 100644
index 0000000..ed5b35f
--- /dev/null
+++ b/test/960-default-smali/src/Foo.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+interface Foo {
+  public default String bar() {
+    return "foobar";
+  }
+}
diff --git a/test/960-default-smali/src/Fooer.java b/test/960-default-smali/src/Fooer.java
new file mode 100644
index 0000000..d8a5f61
--- /dev/null
+++ b/test/960-default-smali/src/Fooer.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Fooer extends Foo {
+  public String bar();
+}
diff --git a/test/960-default-smali/src/K.java b/test/960-default-smali/src/K.java
new file mode 100644
index 0000000..4426be7
--- /dev/null
+++ b/test/960-default-smali/src/K.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class K implements Foo { }
diff --git a/test/960-default-smali/src/L.java b/test/960-default-smali/src/L.java
new file mode 100644
index 0000000..c08ab72
--- /dev/null
+++ b/test/960-default-smali/src/L.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class L extends K { }
diff --git a/test/960-default-smali/src/M.java b/test/960-default-smali/src/M.java
new file mode 100644
index 0000000..affe7e9
--- /dev/null
+++ b/test/960-default-smali/src/M.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class M extends L implements Fooer {
+  public String bar() {
+    return "BAZ!";
+  }
+}
diff --git a/test/960-default-smali/src/classes.xml b/test/960-default-smali/src/classes.xml
index 0aa41f7..f3e50c5 100644
--- a/test/960-default-smali/src/classes.xml
+++ b/test/960-default-smali/src/classes.xml
@@ -81,6 +81,27 @@
       <implements> </implements>
       <methods> </methods>
     </class>
+
+    <class name="K" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="L" super="K">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="M" super="L">
+      <implements>
+        <item>Fooer</item>
+      </implements>
+      <methods>
+        <method>bar</method>
+      </methods>
+    </class>
   </classes>
 
   <interfaces>
@@ -123,5 +144,22 @@
         <method type="abstract">GetPlace</method>
       </methods>
     </interface>
+
+    <interface name="Foo" super="java/lang/Object">
+      <implements>
+      </implements>
+      <methods>
+        <method type="default">bar</method>
+      </methods>
+    </interface>
+
+    <interface name="Fooer" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods>
+        <method type="abstract">bar</method>
+      </methods>
+    </interface>
   </interfaces>
 </data>
diff --git a/test/975-iface-private/build b/test/975-iface-private/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/975-iface-private/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/975-iface-private/expected.txt b/test/975-iface-private/expected.txt
new file mode 100644
index 0000000..908a8f2
--- /dev/null
+++ b/test/975-iface-private/expected.txt
@@ -0,0 +1,4 @@
+Saying hi from class
+HELLO!
+Saying hi from interface
+HELLO!
diff --git a/test/975-iface-private/info.txt b/test/975-iface-private/info.txt
new file mode 100644
index 0000000..d5a8d3f
--- /dev/null
+++ b/test/975-iface-private/info.txt
@@ -0,0 +1,5 @@
+Smali-based tests for experimental interface private methods.
+
+This test cannot be run with --jvm.
+
+This test checks that synthetic private methods in interfaces work correctly.
diff --git a/test/975-iface-private/smali/Iface.smali b/test/975-iface-private/smali/Iface.smali
new file mode 100644
index 0000000..a9a44d1
--- /dev/null
+++ b/test/975-iface-private/smali/Iface.smali
@@ -0,0 +1,45 @@
+
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default void sayHi() {
+#     System.out.println(getHiWords());
+#   }
+#
+#   // Synthetic method
+#   private String getHiWords() {
+#     return "HELLO!";
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-direct {p0}, LIface;->getHiWords()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method private synthetic getHiWords()Ljava/lang/String;
+    .locals 1
+    const-string v0, "HELLO!"
+    return-object v0
+.end method
diff --git a/test/975-iface-private/smali/Main.smali b/test/975-iface-private/smali/Main.smali
new file mode 100644
index 0000000..dbde203
--- /dev/null
+++ b/test/975-iface-private/smali/Main.smali
@@ -0,0 +1,71 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface {
+#   public static void main(String[] args) {
+#     Main m = new Main();
+#     sayHiMain(m);
+#     sayHiIface(m);
+#   }
+#   public static void sayHiMain(Main m) {
+#     System.out.println("Saying hi from class");
+#     m.sayHi();
+#   }
+#   public static void sayHiIface(Iface m) {
+#     System.out.println("Saying hi from interface");
+#     m.sayHi();
+#   }
+# }
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    new-instance v0, LMain;
+    invoke-direct {v0}, LMain;-><init>()V
+
+    invoke-static {v0}, LMain;->sayHiMain(LMain;)V
+    invoke-static {v0}, LMain;->sayHiIface(LIface;)V
+
+    return-void
+.end method
+
+.method public static sayHiMain(LMain;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from class"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->sayHi()V
+    return-void
+.end method
+
+.method public static sayHiIface(LIface;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from interface"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->sayHi()V
+    return-void
+.end method
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index e547c72..21f8141 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -41,7 +41,10 @@
   497-inlining-and-class-loader/clear_dex_cache.cc \
   543-env-long-ref/env_long_ref.cc \
   566-polymorphic-inlining/polymorphic_inline.cc \
-  570-checker-osr/osr.cc
+  570-checker-osr/osr.cc \
+  595-profile-saving/profile-saving.cc \
+  596-app-images/app_images.cc \
+  597-deopt-new-string/deopt.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
@@ -90,7 +93,12 @@
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
-    LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    ifeq ($$(suffix),d)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
     LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 11a38cb..ee651b5 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -37,14 +37,10 @@
   $(DX) \
   $(HOST_OUT_EXECUTABLES)/jasmin \
   $(HOST_OUT_EXECUTABLES)/smali \
-  $(HOST_OUT_EXECUTABLES)/dexmerger
-TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES :=
+  $(HOST_OUT_EXECUTABLES)/dexmerger \
+  $(JACK)
 
-ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-  TEST_ART_RUN_TEST_DEPENDENCIES += \
-    $(JACK)
-  TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
-endif
+TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES := setup-jack-server
 
 ifeq ($(ART_TEST_DEBUG_GC),true)
   ART_TEST_WITH_STRACE := true
@@ -55,11 +51,6 @@
 define define-build-art-run-test
   dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch
   run_test_options = --build-only
-  ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-    run_test_options += --build-with-jack
-  else
-    run_test_options += --build-with-javac-dx
-  endif
   ifeq ($(ART_TEST_QUIET),true)
     run_test_options += --quiet
   endif
@@ -244,8 +235,11 @@
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
 
+# 147-stripped-dex-fallback isn't supported on device because --strip-dex
+# requires the zip command.
 # 569-checker-pattern-replacement tests behaviour present only on host.
 TEST_ART_BROKEN_TARGET_TESTS := \
+  147-stripped-dex-fallback \
   569-checker-pattern-replacement
 
 ifneq (,$(filter target,$(TARGET_TYPES)))
@@ -296,6 +290,7 @@
 # 529 and 555: b/27784033
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
   117-nopatchoat \
+  147-stripped-dex-fallback \
   554-jit-profile-file \
   529-checker-unresolved \
   555-checker-regression-x86const
@@ -384,6 +379,7 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 147-stripped-dex-fallback is disabled because it requires --prebuild.
 # 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
@@ -392,6 +388,7 @@
   119-noimage-patchoat \
   137-cfi \
   138-duplicate-classes-check2 \
+  147-stripped-dex-fallback \
   554-jit-profile-file
 
 # This test fails without an image.
@@ -566,6 +563,13 @@
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
+TEST_ART_BROKEN_NPIC_RUN_TESTS := 596-app-images
+ifneq (,$(filter npictest,$(PICTEST_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      ${COMPILER_TYPES},$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),npictest,$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_NPIC_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
 # Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
 # 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
@@ -670,11 +674,6 @@
   test_groups :=
   uc_host_or_target :=
   jack_classpath :=
-  ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-    run_test_options += --build-with-jack
-  else
-    run_test_options += --build-with-javac-dx
-  endif
   ifeq ($(ART_TEST_WITH_STRACE),true)
     run_test_options += --strace
   endif
diff --git a/test/etc/default-build b/test/etc/default-build
index 3d84821..962ae38 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -69,10 +69,13 @@
 JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 
+declare -A SMALI_EXPERIMENTAL_ARGS
+SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
+
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    option="$1"
+    on="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
@@ -110,6 +113,7 @@
 # Add args from the experimental mappings.
 for experiment in ${EXPERIMENTAL}; do
   JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+  SMALI_ARGS="${SMALI_ARGS} ${SMALI_EXPERIMENTAL_ARGS[${experiment}]}"
 done
 
 if [ -e classes.dex ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index d13d990..aa45d40 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -37,6 +37,7 @@
 PREBUILD="y"
 QUIET="n"
 RELOCATE="y"
+STRIP_DEX="n"
 SECONDARY_DEX=""
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
@@ -118,6 +119,9 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
+    elif [ "x$1" = "x--strip-dex" ]; then
+        STRIP_DEX="y"
+        shift
     elif [ "x$1" = "x--host" ]; then
         HOST="y"
         ANDROID_ROOT="$ANDROID_HOST_OUT"
@@ -319,11 +323,14 @@
 if [ "$INTERPRETER" = "y" ]; then
     INT_OPTS="-Xint"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
     elif [ "$VERIFY" = "s" ]; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail"
     else # VERIFY = "n"
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
     fi
@@ -332,18 +339,12 @@
 if [ "$JIT" = "y" ]; then
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
-      if [ "$PREBUILD" = "n" ]; then
-        # Make sure that if we have noprebuild we still JIT as DexClassLoader will
-        # try to compile the dex file.
-        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
-      fi
     else
+      INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
-      if [ "$PREBUILD" = "n" ]; then
-        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
-      fi
     fi
 fi
 
@@ -358,7 +359,7 @@
         # in 512 byte blocks and set it as the ulimit. This should be more than enough
         # room.
         if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework | tail -1 | cut -f1) || exit 1
+          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1
         fi
     fi
 else
@@ -380,15 +381,18 @@
 
 dex2oat_cmdline="true"
 mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+strip_cmdline="true"
 
-app_image="--app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g")"
+# Pick a base that will force the app image to get relocated.
+app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
 
 if [ "$PREBUILD" = "y" ]; then
+  mkdir_cmdline="${mkdir_cmdline} && mkdir -p ${DEX_LOCATION}/oat/$ISA"
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
-                      --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --oat-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.odex \
                       ${app_image} \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
@@ -407,6 +411,10 @@
   fi
 fi
 
+if [ "$STRIP_DEX" = "y" ]; then
+  strip_cmdline="zip --quiet --delete $DEX_LOCATION/$TEST_NAME.jar classes.dex"
+fi
+
 DALVIKVM_ISA_FEATURES_ARGS=""
 if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
   DALVIKVM_ISA_FEATURES_ARGS="-Xcompiler-option --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
@@ -465,17 +473,21 @@
       LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY
     fi
 
+    PUBLIC_LIBS=libart.so:libartd.so
+
     # Create a script with the command. The command can get longer than the longest
     # allowed adb command and there is no way to get the exit status from a adb shell
     # command.
     cmdline="cd $DEX_LOCATION && \
              export ANDROID_DATA=$DEX_LOCATION && \
+             export ANDROID_ADDITIONAL_PUBLIC_LIBRARIES=$PUBLIC_LIBS && \
              export DEX_LOCATION=$DEX_LOCATION && \
              export ANDROID_ROOT=$ANDROID_ROOT && \
              $mkdir_cmdline && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
+             $strip_cmdline && \
              $dalvikvm_cmdline"
 
     cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
@@ -546,13 +558,7 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      if [ "$PREBUILD" = "y" ]; then
-        echo "$mkdir_cmdline && $dex2oat_cmdline && $cmdline"
-      elif [ "$RELOCATE" = "y" ]; then
-        echo "$mkdir_cmdline && $cmdline"
-      else
-        echo $cmdline
-      fi
+      echo "$mkdir_cmdline && $dex2oat_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
@@ -560,6 +566,7 @@
     rm -rf ${DEX_LOCATION}/dalvik-cache/
     $mkdir_cmdline || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
+    $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
     # For running, we must turn off logging when dex2oat or patchoat are missing. Otherwise we use
     # the same defaults as for prebuilt: everything when --dev, otherwise errors and above only.
diff --git a/test/run-test b/test/run-test
index 01464cd..2710ea3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -46,7 +46,7 @@
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 export USE_JACK="true"
-export SMALI_ARGS="--experimental --api-level 23"
+export SMALI_ARGS="--experimental"
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
@@ -122,10 +122,12 @@
 have_dex2oat="yes"
 have_patchoat="yes"
 have_image="yes"
-image_suffix=""
 pic_image_suffix=""
 multi_image_suffix=""
 android_root="/system"
+# By default we will use optimizing.
+image_args=""
+image_suffix="-optimizing"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
@@ -148,6 +150,7 @@
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
+        image_args=""
         prebuild_mode="no"
         NEED_DEX="false"
         USE_JACK="false"
@@ -190,6 +193,9 @@
         run_args="${run_args} --prebuild"
         prebuild_mode="yes"
         shift;
+    elif [ "x$1" = "x--strip-dex" ]; then
+        run_args="${run_args} --strip-dex"
+        shift;
     elif [ "x$1" = "x--debuggable" ]; then
         run_args="${run_args} -Xcompiler-option --debuggable"
         debuggable="yes"
@@ -241,22 +247,22 @@
         run_args="${run_args} --zygote"
         shift
     elif [ "x$1" = "x--interpreter" ]; then
-        run_args="${run_args} --interpreter --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        run_args="${run_args} --interpreter"
         image_suffix="-interpreter"
         shift
     elif [ "x$1" = "x--jit" ]; then
-        run_args="${run_args} --jit --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        image_args="--jit"
         image_suffix="-jit"
         shift
     elif [ "x$1" = "x--optimizing" ]; then
-        run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
+        image_args="-Xcompiler-option --compiler-backend=Optimizing"
         image_suffix="-optimizing"
         shift
     elif [ "x$1" = "x--no-verify" ]; then
-        run_args="${run_args} --no-verify --runtime-option -XOatFileManagerCompilerFilter:verify-none"
+        run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--verify-soft-fail" ]; then
-        run_args="${run_args} --verify-soft-fail --runtime-option -XOatFileManagerCompilerFilter:verify-at-runtime"
+        image_args="--verify-soft-fail"
         image_suffix="-interp-ac"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
@@ -345,6 +351,7 @@
     fi
 done
 
+run_args="${run_args} ${image_args}"
 # Allocate file descriptor real_stderr and redirect it to the shell's error
 # output (fd 2).
 if [ ${BASH_VERSINFO[1]} -ge 4 ] && [ ${BASH_VERSINFO[2]} -ge 1 ]; then
@@ -449,7 +456,7 @@
     if [ "$target_mode" = "no" ]; then
         framework="${ANDROID_PRODUCT_OUT}/system/framework"
         bpath="${framework}/core-libart.jar:${framework}/core-oj.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
-        run_args="${run_args} --boot -Xbootclasspath:${bpath}"
+        run_args="${run_args} --boot --runtime-option -Xbootclasspath:${bpath}"
     else
         true # defaults to using target BOOTCLASSPATH
     fi
@@ -464,7 +471,7 @@
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
         guess_target_arch_name
-        run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
+        run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}:/system/lib${suffix64}"
         run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
     fi
     if [ "$relocate" = "yes" ]; then
@@ -571,6 +578,7 @@
         echo "    --prebuild            Run dex2oat on the files before starting test. (default)"
         echo "    --no-prebuild         Do not run dex2oat on the files before starting"
         echo "                          the test."
+        echo "    --strip-dex           Strip the dex files before starting test."
         echo "    --relocate            Force the use of relocating in the test, making"
         echo "                          the image and oat files be relocated to a random"
         echo "                          address before running. (default)"
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index d9b26bc..0cd77ab 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -77,7 +77,12 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
- 0.4 Pending
+ 0.6 Pending
+
+ 0.5 Apr 19, 2016
+   Update perflib to perflib-25.0.0 to improve processing performance.
+
+ 0.4 Feb 23, 2016
    Annotate char[] objects with their string values.
    Show registered native allocations for heap dumps that support it.
 
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 2adec6f..d088e8c 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -25,8 +25,8 @@
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
 import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+import gnu.trove.TObjectProcedure;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -85,49 +85,59 @@
 
     ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
-      long total = 0;
-      for (Instance inst : Iterables.concat(heap.getClasses(), heap.getInstances())) {
-        Instance dominator = inst.getImmediateDominator();
-        if (dominator != null) {
-          total += inst.getSize();
+      // Use a single element array for the total to act as a reference to a
+      // long.
+      final long[] total = new long[]{0};
+      TObjectProcedure<Instance> processInstance = new TObjectProcedure<Instance>() {
+        @Override
+        public boolean execute(Instance inst) {
+          Instance dominator = inst.getImmediateDominator();
+          if (dominator != null) {
+            total[0] += inst.getSize();
 
-          if (dominator == Snapshot.SENTINEL_ROOT) {
-            mRooted.add(inst);
-          }
+            if (dominator == Snapshot.SENTINEL_ROOT) {
+              mRooted.add(inst);
+            }
 
-          // Properly label the class of a class object.
-          if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
-              inst.setClassId(javaLangClass.getId());
-          }
+            // Properly label the class of a class object.
+            if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
+                inst.setClassId(javaLangClass.getId());
+            }
 
-          // Update dominated instances.
-          List<Instance> instances = mDominated.get(dominator);
-          if (instances == null) {
-            instances = new ArrayList<Instance>();
-            mDominated.put(dominator, instances);
-          }
-          instances.add(inst);
+            // Update dominated instances.
+            List<Instance> instances = mDominated.get(dominator);
+            if (instances == null) {
+              instances = new ArrayList<Instance>();
+              mDominated.put(dominator, instances);
+            }
+            instances.add(inst);
 
-          // Update sites.
-          List<StackFrame> path = Collections.emptyList();
-          StackTrace stack = getStack(inst);
-          int stackId = getStackTraceSerialNumber(stack);
-          if (stack != null) {
-            StackFrame[] frames = getStackFrames(stack);
-            if (frames != null && frames.length > 0) {
-              path = Lists.reverse(Arrays.asList(frames));
+            // Update sites.
+            List<StackFrame> path = Collections.emptyList();
+            StackTrace stack = getStack(inst);
+            int stackId = getStackTraceSerialNumber(stack);
+            if (stack != null) {
+              StackFrame[] frames = getStackFrames(stack);
+              if (frames != null && frames.length > 0) {
+                path = Lists.reverse(Arrays.asList(frames));
+              }
+            }
+            mRootSite.add(stackId, 0, path.iterator(), inst);
+
+            // Update native allocations.
+            InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
+            if (alloc != null) {
+              mNativeAllocations.add(alloc);
             }
           }
-          mRootSite.add(stackId, 0, path.iterator(), inst);
-
-          // Update native allocations.
-          InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
-          if (alloc != null) {
-            mNativeAllocations.add(alloc);
-          }
+          return true;
         }
+      };
+      for (Instance instance : heap.getClasses()) {
+        processInstance.execute(instance);
       }
-      mHeapSizes.put(heap, total);
+      heap.forEachInstance(processInstance);
+      mHeapSizes.put(heap, total[0]);
     }
 
     // Record the roots and their types.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index d7b64e2..8defba2 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -244,8 +244,8 @@
 
     if (inst instanceof ArrayInstance) {
       ArrayInstance array = (ArrayInstance)inst;
-      if (array.getArrayType() == Type.BYTE && inst.getHardReferences().size() == 1) {
-        Instance ref = inst.getHardReferences().get(0);
+      if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
+        Instance ref = inst.getHardReverseReferences().get(0);
         ClassObj clsref = ref.getClassObj();
         if (clsref != null && "android.graphics.Bitmap".equals(clsref.getClassName())) {
           return ref;
@@ -344,7 +344,7 @@
     }
 
     Instance referent = null;
-    for (Instance ref : inst.getHardReferences()) {
+    for (Instance ref : inst.getHardReverseReferences()) {
       if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
         referent = InstanceUtils.getReferent(ref);
         if (referent != null) {
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 06023da..4df1be5 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -160,11 +160,11 @@
   private static void printReferences(
       Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
-    if (inst.getHardReferences().isEmpty()) {
+    if (inst.getHardReverseReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getHardReferences();
+      List<Instance> references = inst.getHardReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));
@@ -173,10 +173,10 @@
       selector.render(doc);
     }
 
-    if (inst.getSoftReferences() != null) {
+    if (inst.getSoftReverseReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getSoftReferences();
+      List<Instance> references = inst.getSoftReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index d61a98d..3936f29 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -50,7 +50,8 @@
         bigArray[i] = (byte)((i*i) & 0xFF);
       }
 
-      NativeAllocationRegistry registry = new NativeAllocationRegistry(0x12345, 42);
+      NativeAllocationRegistry registry = new NativeAllocationRegistry(
+          Main.class.getClassLoader(), 0x12345, 42);
       registry.registerNativeAllocation(anObject, 0xABCDABCD);
     }
   }
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 2eb52bc..304c2a9 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -46,9 +46,14 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets"
+  make_command+=" ${out_dir}/host/linux-x86/lib/libjavacoretests.so "
+  make_command+=" ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb libstdc++"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets"
+  make_command+=" libjavacrypto libjavacoretests linker toybox toolbox sh"
+  make_command+=" ${out_dir}/host/linux-x86/bin/adb libstdc++ "
+  make_command+=" ${out_dir}/target/product/${TARGET_PRODUCT}/system/etc/public.libraries.txt"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
index 6ff19d5..520c4ae 100644
--- a/tools/checker/match/file.py
+++ b/tools/checker/match/file.py
@@ -172,8 +172,8 @@
     # match a check group against the first output group of the same name.
     c1Pass = c1File.findPass(testCase.name)
     if c1Pass is None:
-      Logger.fail("Test case \"{}\" not found in the CFG file".format(testCase.name),
-                  testCase.fileName, testCase.startLineNo)
+      Logger.fail("Test case not found in the CFG file",
+                  testCase.fileName, testCase.startLineNo, testCase.name)
 
     Logger.startTest(testCase.name)
     try:
diff --git a/tools/dmtracedump/tracedump.cc b/tools/dmtracedump/tracedump.cc
index f70e2c2..3afee6f 100644
--- a/tools/dmtracedump/tracedump.cc
+++ b/tools/dmtracedump/tracedump.cc
@@ -512,10 +512,10 @@
 void freeDataKeys(DataKeys* pKeys) {
   if (pKeys == nullptr) return;
 
-  free(pKeys->fileData);
-  free(pKeys->threads);
-  free(pKeys->methods);
-  free(pKeys);
+  delete[] pKeys->fileData;
+  delete[] pKeys->threads;
+  delete[] pKeys->methods;
+  delete pKeys;
 }
 
 /*
@@ -822,8 +822,8 @@
 DataKeys* parseKeys(FILE* fp, int32_t verbose) {
   int64_t offset;
   DataKeys* pKeys = new DataKeys();
-  memset(pKeys, 0, sizeof(DataKeys));
   if (pKeys == nullptr) return nullptr;
+  memset(pKeys, 0, sizeof(DataKeys));
 
   /*
    * We load the entire file into memory.  We do this, rather than memory-
@@ -865,9 +865,13 @@
     return nullptr;
   }
 
-  /* Reduce our allocation now that we know where the end of the key section is. */
-  pKeys->fileData = reinterpret_cast<char*>(realloc(pKeys->fileData, offset));
-  pKeys->fileLen = offset;
+  /*
+   * Although it is tempting to reduce our allocation now that we know where the
+   * end of the key section is, there is a pitfall. The method names and
+   * signatures in the method list contain pointers into the fileData area.
+   * Realloc or free will result in corruption.
+   */
+
   /* Leave fp pointing to the beginning of the data section. */
   fseek(fp, offset, SEEK_SET);
 
@@ -2607,7 +2611,7 @@
     if (gOptions.graphFileName != nullptr) {
       createInclusiveProfileGraphNew(dataKeys);
     }
-    free(methods);
+    delete[] methods;
   }
 
   freeDataKeys(dataKeys);
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 2533ce2..f25fb98 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -243,30 +243,6 @@
           "org.apache.harmony.tests.java.util.prefs.FilePreferencesImplTest#testPutGet"]
 },
 {
-  description: "libnativehelper_compat_libc++ loading issue",
-  result: EXEC_FAILED,
-  modes: [device],
-  names: ["dalvik.system.JniTest#testGetSuperclass",
-          "dalvik.system.JniTest#testPassingBooleans",
-          "dalvik.system.JniTest#testPassingBytes",
-          "dalvik.system.JniTest#testPassingChars",
-          "dalvik.system.JniTest#testPassingClass",
-          "dalvik.system.JniTest#testPassingDoubles",
-          "dalvik.system.JniTest#testPassingFloats",
-          "dalvik.system.JniTest#testPassingInts",
-          "dalvik.system.JniTest#testPassingLongs",
-          "dalvik.system.JniTest#testPassingObjectReferences",
-          "dalvik.system.JniTest#testPassingShorts",
-          "dalvik.system.JniTest#testPassingThis",
-          "libcore.util.NativeAllocationRegistryTest#testBadSize",
-          "libcore.util.NativeAllocationRegistryTest#testEarlyFree",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndNoSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationAllocatorAndSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndNoSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNativeAllocationNoAllocatorAndSharedRegistry",
-          "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
-},
-{
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 75d1eff..95f0c2d 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -16,11 +16,5 @@
   names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
           "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
   bug: 25883050
-},
-{
-  description: "libcore.java.lang.OldSystemTest#test_gc failure on armv8-concurrent-collector.",
-  result: EXEC_FAILED,
-  names: ["libcore.java.lang.OldSystemTest#test_gc"],
-  bug: 26155567
 }
 ]
diff --git a/tools/public.libraries.buildbot.txt b/tools/public.libraries.buildbot.txt
new file mode 100644
index 0000000..4b01796
--- /dev/null
+++ b/tools/public.libraries.buildbot.txt
@@ -0,0 +1,8 @@
+libart.so
+libartd.so
+libbacktrace.so
+libc.so
+libc++.so
+libdl.so
+libm.so
+libnativehelper.so
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 8422e20..b6a19b7 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -28,18 +28,6 @@
   exit 1
 fi
 
-if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
-  # For the moment, skip JDWP tests when read barriers are enabled, as
-  # they sometimes exhibit a deadlock issue with the concurrent
-  # copying collector in the read barrier configuration, between the
-  # HeapTaskDeamon and the JDWP thread (b/25800335).
-  #
-  # TODO: Re-enable the JDWP tests when this deadlock issue is fixed.
-  echo "JDWP tests are temporarily disabled in the read barrier configuration because of"
-  echo "a deadlock issue (b/25800335)."
-  exit 0
-fi
-
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
 args=$@
@@ -114,9 +102,6 @@
   art_debugee="$art_debugee -verbose:jdwp"
 fi
 
-# Use Jack with "1.8" configuration.
-export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
-
 # Run the tests using vogar.
 vogar $vm_command \
       $vm_args \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 45fb4b4d..00bb3c5 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -109,7 +109,6 @@
 vogar_args="$vogar_args --timeout 480"
 
 # Use Jack with "1.8" configuration.
-export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
 vogar_args="$vogar_args --toolchain jack --language JN"
 
 # Run the tests using vogar.