Merge "Launch ahat server before processing the heap dump."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index bd13d16..a679ac2 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -158,6 +158,10 @@
 # Enable warning for unreachable break & return.
 art_clang_cflags += -Wunreachable-code-break -Wunreachable-code-return
 
+# Bug: http://b/29823425  Disable -Wconstant-conversion and
+# -Wundefined-var-template for Clang update to r271374
+art_clang_cflags += -Wno-constant-conversion -Wno-undefined-var-template
+
 # Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's
 # a pain.
 ifneq ($(HOST_OS),darwin)
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index b1644df..e213dc4 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -38,7 +38,7 @@
 ifneq ($(TMPDIR),)
 ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
 else
-ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
 endif
 
 # core.oat location on the device.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 74c3033..7f8fa8e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -71,7 +71,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
-ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
@@ -636,7 +636,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,debug))
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl-arm64
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
@@ -680,7 +680,7 @@
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
     LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS)
-    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
+    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl-arm64
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 9b4042c..f05648c 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -462,7 +462,7 @@
 struct XGcOption {
   // These defaults are used when the command line arguments for -Xgc:
   // are either omitted completely or partially.
-  gc::CollectorType collector_type_ =  kUseReadBarrier ?
+  gc::CollectorType collector_type_ = kUseReadBarrier ?
                                            // If RB is enabled (currently a build-time decision),
                                            // use CC as the default GC.
                                            gc::kCollectorTypeCC :
@@ -473,6 +473,7 @@
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
+  bool measure_ = kIsDebugBuild;
   bool gcstress_ = false;
 };
 
@@ -515,6 +516,8 @@
         xgc.gcstress_ = true;
       } else if (gc_option == "nogcstress") {
         xgc.gcstress_ = false;
+      } else if (gc_option == "measure") {
+        xgc.measure_ = true;
       } else if ((gc_option == "precise") ||
                  (gc_option == "noprecise") ||
                  (gc_option == "verifycardtable") ||
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 02c176c..e3f8a5c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -68,6 +68,8 @@
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
+	optimizing/register_allocation_resolver.cc \
+	optimizing/register_allocator_linear_scan.cc \
 	optimizing/select_generator.cc \
 	optimizing/sharpening.cc \
 	optimizing/side_effects_analysis.cc \
@@ -75,6 +77,7 @@
 	optimizing/ssa_liveness_analysis.cc \
 	optimizing/ssa_phi_elimination.cc \
 	optimizing/stack_map_stream.cc \
+	optimizing/x86_memory_gen.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
@@ -280,15 +283,15 @@
   # Vixl assembly support for ARM64 targets.
   ifeq ($$(art_ndebug_or_debug),debug)
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLESTATIC_LIBRARIES += libvixl
+      LOCAL_WHOLESTATIC_LIBRARIES += libvixl-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixl
+      LOCAL_SHARED_LIBRARIES += libvixl-arm64
     endif
   else
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl
+      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl-arm64
     else
-      LOCAL_SHARED_LIBRARIES += libvixl
+      LOCAL_SHARED_LIBRARIES += libvixl-arm64
     endif
   endif
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 672018b..18ebfeb 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -158,7 +158,7 @@
   }
 
   bool WriteElf(File* file,
-                ScopedFd&& zip_fd,
+                File&& zip_fd,
                 const char* location,
                 SafeMap<std::string, std::string>& key_value_store,
                 bool verify) {
@@ -444,7 +444,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(133 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(164 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -708,8 +708,8 @@
 
   {
     // Test using the AddZipDexFileSource() interface with the zip file handle.
-    ScopedFd zip_fd(dup(zip_file.GetFd()));
-    ASSERT_NE(-1, zip_fd.get());
+    File zip_fd(dup(zip_file.GetFd()), /* check_usage */ false);
+    ASSERT_NE(-1, zip_fd.Fd());
 
     ScratchFile oat_file;
     success = WriteElf(oat_file.GetFile(),
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index cdc7df1..b32199f 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -323,14 +323,14 @@
   DCHECK(write_state_ == WriteState::kAddingDexFileSources);
   uint32_t magic;
   std::string error_msg;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
     PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
     return false;
   } else if (IsDexMagic(magic)) {
     // The file is open for reading, not writing, so it's OK to let the File destructor
     // close it without checking for explicit Close(), so pass checkUsage = false.
-    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    raw_dex_files_.emplace_back(new File(fd.Release(), location, /* checkUsage */ false));
     oat_dex_files_.emplace_back(location,
                                 DexFileSource(raw_dex_files_.back().get()),
                                 create_type_lookup_table);
@@ -346,12 +346,12 @@
 }
 
 // Add dex file source(s) from a zip file specified by a file handle.
-bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+bool OatWriter::AddZippedDexFilesSource(File&& zip_fd,
                                         const char* location,
                                         CreateTypeLookupTable create_type_lookup_table) {
   DCHECK(write_state_ == WriteState::kAddingDexFileSources);
   std::string error_msg;
-  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.Release(), location, &error_msg));
   ZipArchive* zip_archive = zip_archives_.back().get();
   if (zip_archive == nullptr) {
     LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index cc81f39..decb7db 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -29,7 +29,6 @@
 #include "oat.h"
 #include "os.h"
 #include "safe_map.h"
-#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -132,7 +131,7 @@
       CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
   // Add dex file source(s) from a zip file specified by a file handle.
   bool AddZippedDexFilesSource(
-      ScopedFd&& zip_fd,
+      File&& zip_fd,
       const char* location,
       CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
   // Add dex file source from raw memory.
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1fc247f..8aefd9e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -533,9 +533,6 @@
         first_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        dynamic_bce_standby_(
-            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        record_dynamic_bce_standby_(true),
         early_exit_loop_(
             std::less<uint32_t>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
@@ -560,14 +557,6 @@
   }
 
   void Finish() {
-    // Retry dynamic bce candidates on standby that are still in the graph.
-    record_dynamic_bce_standby_ = false;
-    for (HBoundsCheck* bounds_check : dynamic_bce_standby_) {
-      if (bounds_check->IsInBlock()) {
-        TryDynamicBCE(bounds_check);
-      }
-    }
-
     // Preserve SSA structure which may have been broken by adding one or more
     // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
     InsertPhiNodes();
@@ -576,7 +565,6 @@
     early_exit_loop_.clear();
     taken_test_loop_.clear();
     finite_loop_.clear();
-    dynamic_bce_standby_.clear();
   }
 
  private:
@@ -832,7 +820,6 @@
            array_length->IsArrayLength() ||
            array_length->IsPhi());
     bool try_dynamic_bce = true;
-
     // Analyze index range.
     if (!index->IsIntConstant()) {
       // Non-constant index.
@@ -896,10 +883,20 @@
     // If static analysis fails, and OOB is not certain, try dynamic elimination.
     if (try_dynamic_bce) {
       // Try loop-based dynamic elimination.
-      if (TryDynamicBCE(bounds_check)) {
+      HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+      bool needs_finite_test = false;
+      bool needs_taken_test = false;
+      if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+          induction_range_.CanGenerateCode(
+              bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+          CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+          // Do this test last, since it may generate code.
+          CanHandleLength(loop, array_length, needs_taken_test)) {
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDynamicBCE(loop, bounds_check);
         return;
       }
-      // Prepare dominator-based dynamic elimination.
+      // Otherwise, prepare dominator-based dynamic elimination.
       if (first_index_bounds_check_map_.find(array_length->GetId()) ==
           first_index_bounds_check_map_.end()) {
         // Remember the first bounds check against each array_length. That bounds check
@@ -1180,7 +1177,7 @@
     }
   }
 
-  // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+  /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
   void AddCompareWithDeoptimization(HBasicBlock* block,
                                     HInstruction* array_length,
                                     HInstruction* base,
@@ -1190,6 +1187,12 @@
     // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
     // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
     // and base+3, since we made the assumption any in between value may occur too.
+    // In code, using unsigned comparisons:
+    // (1) constants only
+    //       if (max_c >= a.length) deoptimize;
+    // (2) general case
+    //       if (base-min_c >  base+max_c) deoptimize;
+    //       if (base+max_c >= a.length  ) deoptimize;
     static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
                   "Incorrect max length may be subject to arithmetic wrap-around");
     HInstruction* upper = GetGraph()->GetIntConstant(max_c);
@@ -1208,7 +1211,7 @@
     has_dom_based_dynamic_bce_ = true;
   }
 
-  // Attempt dominator-based dynamic elimination on remaining candidates.
+  /** Attempts dominator-based dynamic elimination on remaining candidates. */
   void AddComparesWithDeoptimization(HBasicBlock* block) {
     for (const auto& entry : first_index_bounds_check_map_) {
       HBoundsCheck* bounds_check = entry.second;
@@ -1272,17 +1275,19 @@
           candidates.push_back(other_bounds_check);
         }
       }
-      // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
-      // where the distance min_c:max_c range gets close to the maximum possible array length,
-      // since those cases are likely to always deopt (such situations do not necessarily go
-      // OOB, though, since the programmer could rely on wrap-around from max to min).
+      // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+      // bounds checks and replace these with deopt checks that guard against any possible
+      // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+      // the maximum possible array length, since those cases are likely to always deopt
+      // (such situations do not necessarily go OOB, though, since the array could be really
+      // large, or the programmer could rely on arithmetic wrap-around from max to min).
       size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1);  // extra test?
       uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
       if (candidates.size() >= threshold &&
           (base != nullptr || min_c >= 0) &&  // reject certain OOB
            distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
         AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
-        for (HInstruction* other_bounds_check : candidates) {
+        for (HBoundsCheck* other_bounds_check : candidates) {
           // Only replace if still in the graph. This avoids visiting the same
           // bounds check twice if it occurred multiple times in the use list.
           if (other_bounds_check->IsInBlock()) {
@@ -1328,45 +1333,127 @@
   }
 
   /**
-   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
-   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
-   * will go out of range (we want to be rather certain of that given the slowdown of
-   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
-   * bounds checks and related null checks removed.
+   * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+   * number of eventually generated tests, related bounds checks with tests that can be
+   * combined with tests for the given bounds check are collected first.
    */
-  bool TryDynamicBCE(HBoundsCheck* instruction) {
-    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
-    HInstruction* index = instruction->InputAt(0);
-    HInstruction* length = instruction->InputAt(1);
-    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
-    bool needs_finite_test = false;
-    bool needs_taken_test = false;
-    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
-        induction_range_.CanGenerateCode(
-            instruction, index, &needs_finite_test, &needs_taken_test) &&
-        CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) &&
-        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
-      HInstruction* lower = nullptr;
-      HInstruction* upper = nullptr;
-      // Generate the following unsigned comparisons
-      //     if (lower > upper)   deoptimize;
-      //     if (upper >= length) deoptimize;
-      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
-      //     if (upper >= length) deoptimize;
-      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
-      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
-      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
-      HBasicBlock* block = GetPreHeader(loop, instruction);
-      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
-      if (lower != nullptr) {
-        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+  void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+    HInstruction* index = bounds_check->InputAt(0);
+    HInstruction* array_length = bounds_check->InputAt(1);
+    DCHECK(loop->IsDefinedOutOfTheLoop(array_length));  // pre-checked
+    DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+    // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+    // for a base instruction (possibly absent) and various constants.
+    ValueBound value = ValueBound::AsValueBound(index);
+    HInstruction* base = value.GetInstruction();
+    int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+    int32_t max_c = value.GetConstant();
+    ArenaVector<HBoundsCheck*> candidates(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    ArenaVector<HBoundsCheck*> standby(
+        GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+    for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+        HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        HInstruction* other_array_length = other_bounds_check->InputAt(1);
+        ValueBound other_value = ValueBound::AsValueBound(other_index);
+        int32_t other_c = other_value.GetConstant();
+        if (array_length == other_array_length && base == other_value.GetInstruction()) {
+          // Does the current basic block dominate all back edges? If not,
+          // add this candidate later only if it falls into the range.
+          if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+            standby.push_back(other_bounds_check);
+            continue;
+          }
+          min_c = std::min(min_c, other_c);
+          max_c = std::max(max_c, other_c);
+          candidates.push_back(other_bounds_check);
+        }
       }
-      InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
-      ReplaceInstruction(instruction, index);
-      return true;
     }
-    return false;
+    // Add standby candidates that fall in selected range.
+    for (HBoundsCheck* other_bounds_check : standby) {
+      HInstruction* other_index = other_bounds_check->InputAt(0);
+      int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+      if (min_c <= other_c && other_c <= max_c) {
+        candidates.push_back(other_bounds_check);
+      }
+    }
+    // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+    // checks and replace these with deopt checks that guard against any possible OOB.
+    DCHECK_LT(0u, candidates.size());
+    uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+    if ((base != nullptr || min_c >= 0) &&  // reject certain OOB
+        distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+      HBasicBlock* block = GetPreHeader(loop, bounds_check);
+      HInstruction* min_lower = nullptr;
+      HInstruction* min_upper = nullptr;
+      HInstruction* max_lower = nullptr;
+      HInstruction* max_upper = nullptr;
+      // Iterate over all bounds checks.
+      for (HBoundsCheck* other_bounds_check : candidates) {
+        // Only handle if still in the graph. This avoids visiting the same
+        // bounds check twice if it occurred multiple times in the use list.
+        if (other_bounds_check->IsInBlock()) {
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+          // Generate code for either the maximum or minimum. Range analysis already was queried
+          // whether code generation on the original and, thus, related bounds check was possible.
+          // It handles either loop invariants (lower is not set) or unit strides.
+          if (other_c == max_c) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+          } else if (other_c == min_c && base != nullptr) {
+            induction_range_.GenerateRangeCode(
+                other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+          }
+          ReplaceInstruction(other_bounds_check, other_index);
+        }
+      }
+      // In code, using unsigned comparisons:
+      // (1) constants only
+      //       if (max_upper >= a.length ) deoptimize;
+      // (2) two symbolic invariants
+      //       if (min_upper >  max_upper) deoptimize;   unless min_c == max_c
+      //       if (max_upper >= a.length ) deoptimize;
+      // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+      //       if (min_lower >  max_lower) deoptimize;   unless min_c == max_c
+      //       if (max_lower >  max_upper) deoptimize;
+      //       if (max_upper >= a.length ) deoptimize;
+      if (base == nullptr) {
+        // Constants only.
+        DCHECK_GE(min_c, 0);
+        DCHECK(min_lower == nullptr && min_upper == nullptr &&
+               max_lower == nullptr && max_upper != nullptr);
+      } else if (max_lower == nullptr) {
+        // Two symbolic invariants.
+        if (min_c != max_c) {
+          DCHECK(min_lower == nullptr && min_upper != nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower == nullptr && max_upper != nullptr);
+        }
+      } else {
+        // General case, unit strides.
+        if (min_c != max_c) {
+          DCHECK(min_lower != nullptr && min_upper != nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+          InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+        } else {
+          DCHECK(min_lower == nullptr && min_upper == nullptr &&
+                 max_lower != nullptr && max_upper != nullptr);
+        }
+        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+      }
+      InsertDeoptInLoop(
+          loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+    } else {
+      // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
+    }
   }
 
   /**
@@ -1474,8 +1561,7 @@
    * of the loop to use, dynamic bce in such cases is only allowed if other tests
    * ensure the loop is finite.
    */
-  bool CanHandleInfiniteLoop(
-      HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) {
+  bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
     if (needs_infinite_test) {
       // If we already forced the loop to be finite, allow directly.
       const uint32_t loop_id = loop->GetHeader()->GetBlockId();
@@ -1497,11 +1583,6 @@
           }
         }
       }
-      // If bounds check made it this far, it is worthwhile to check later if
-      // the loop was forced finite by another candidate.
-      if (record_dynamic_bce_standby_) {
-        dynamic_bce_standby_.push_back(check);
-      }
       return false;
     }
     return true;
@@ -1727,10 +1808,6 @@
   // in a block that checks an index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
 
-  // Stand by list for dynamic bce.
-  ArenaVector<HBoundsCheck*> dynamic_bce_standby_;
-  bool record_dynamic_bce_standby_;
-
   // Early-exit loop bookkeeping.
   ArenaSafeMap<uint32_t, bool> early_exit_loop_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4520f9b..3269dc6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -291,7 +291,8 @@
   DCHECK(!block_order.empty());
   DCHECK(block_order[0] == GetGraph()->GetEntryBlock());
   ComputeSpillMask();
-  first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+  first_register_slot_in_slow_path_ = RoundUp(
+      (number_of_out_slots + number_of_spill_slots) * kVRegSize, GetPreferredSlotsAlignment());
 
   if (number_of_spill_slots == 0
       && !HasAllocatedCalleeSaveRegisters()
@@ -302,8 +303,7 @@
     SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
   } else {
     SetFrameSize(RoundUp(
-        number_of_spill_slots * kVRegSize
-        + number_of_out_slots * kVRegSize
+        first_register_slot_in_slow_path_
         + maximum_number_of_live_core_registers * GetWordSize()
         + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
         + FrameEntrySpillSize(),
@@ -314,7 +314,8 @@
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
-  LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall);
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnMainOnly);
 
   for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
@@ -378,7 +379,7 @@
 
   ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
   LocationSummary* locations =
-      new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+      new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly);
 
   locations->AddTemp(calling_convention.GetFieldIndexLocation());
 
@@ -499,7 +500,7 @@
                                                    bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
           cls->CanCallRuntime())
             ? LocationSummary::kCallOnSlowPath
@@ -1177,19 +1178,19 @@
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // When read barriers are enabled, some instructions use a
-           // slow path to emit a read barrier, which does not trigger
-           // GC, is not fatal, nor is emitted by HDeoptimize
-           // instructions.
+           // When (non-Baker) read barriers are enabled, some instructions
+           // use a slow path to emit a read barrier, which does not trigger
+           // GC.
            (kEmitCompilerReadBarrier &&
+            !kUseBakerReadBarrier &&
             (instruction->IsInstanceFieldGet() ||
              instruction->IsStaticFieldGet() ||
-             instruction->IsArraySet() ||
              instruction->IsArrayGet() ||
              instruction->IsLoadClass() ||
              instruction->IsLoadString() ||
              instruction->IsInstanceOf() ||
-             instruction->IsCheckCast())))
+             instruction->IsCheckCast() ||
+             (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
         << "instruction->DebugName()=" << instruction->DebugName()
         << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
@@ -1203,6 +1204,27 @@
       << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
 }
 
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                                SlowPathCode* slow_path) {
+  DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+  // Only the Baker read barrier marking slow path used by certains
+  // instructions is expected to invoke the runtime without recording
+  // PC-related information.
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsArrayGet() ||
+         instruction->IsLoadClass() ||
+         instruction->IsLoadString() ||
+         instruction->IsInstanceOf() ||
+         instruction->IsCheckCast() ||
+         (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+      << "instruction->DebugName()=" << instruction->DebugName()
+      << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9364be3..2042ade 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -80,7 +80,11 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
 
+  // Save live core and floating-point caller-save registers and
+  // update the stack mask in `locations` for registers holding object
+  // references.
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+  // Restore live core and floating-point caller-save registers.
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
 
   bool IsCoreRegisterSaved(int reg) const {
@@ -211,6 +215,8 @@
                                 size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
+  // Backends can override this as necessary. For most, no special alignment is required.
+  virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -350,6 +356,16 @@
   // accessing the String's `value` field in String intrinsics.
   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
 
+  // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
+  template <size_t pointer_size>
+  static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
+    DCHECK_LT(reg, 32u);
+    // The ReadBarrierMarkRegX entry points are ordered by increasing
+    // register number in Thread::tls_Ptr_.quick_entrypoints.
+    return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+        + pointer_size * reg;
+  }
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -363,8 +379,14 @@
     return type == Primitive::kPrimNot && !value->IsNullConstant();
   }
 
+
+  // Perfoms checks pertaining to an InvokeRuntime call.
   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
 
+  // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+  static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+                                                          SlowPathCode* slow_path);
+
   void AddAllocatedRegister(Location location) {
     allocated_registers_.Add(location);
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 690ecc3..124a61f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -316,7 +316,7 @@
                                  instruction_->GetDexPc(),
                                  this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -412,8 +412,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathARM : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -421,9 +421,9 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -431,30 +431,41 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, SP);
+    DCHECK_NE(reg, LR);
+    DCHECK_NE(reg, PC);
+    DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   R0 <- obj
+    //   R0 <- ReadBarrierMark(R0)
+    //   obj <- R0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmWordSize>(reg);
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ b(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
@@ -500,8 +511,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -1224,6 +1234,14 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+  __ blx(LR);
+}
+
 void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
@@ -1917,7 +1935,7 @@
   __ LoadFromOffset(kLoadWord, temp, temp,
         mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
+      invoke->GetImtIndex(), kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
@@ -2014,7 +2032,7 @@
       (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
         && result_type == Primitive::kPrimLong)
        || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -2833,13 +2851,13 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   if (div->GetResultType() == Primitive::kPrimLong) {
     // pLdiv runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
   } else if (div->GetResultType() == Primitive::kPrimInt &&
              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // pIdivmod runtime call.
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2958,7 +2976,7 @@
   Primitive::Type type = rem->GetResultType();
 
   // Most remainders are implemented in the runtime.
-  LocationSummary::CallKind call_kind = LocationSummary::kCall;
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
   if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
     // sdiv will be replaced by other instruction sequence.
     call_kind = LocationSummary::kNoCall;
@@ -3495,7 +3513,7 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
   } else {
@@ -3528,7 +3546,7 @@
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(R0));
@@ -4266,6 +4284,122 @@
   codegen_->GenerateNullCheck(instruction);
 }
 
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kLoadWord;
+    case Primitive::kPrimBoolean:
+      return kLoadUnsignedByte;
+    case Primitive::kPrimByte:
+      return kLoadSignedByte;
+    case Primitive::kPrimChar:
+      return kLoadUnsignedHalfword;
+    case Primitive::kPrimShort:
+      return kLoadSignedHalfword;
+    case Primitive::kPrimInt:
+      return kLoadWord;
+    case Primitive::kPrimLong:
+      return kLoadWordPair;
+    case Primitive::kPrimFloat:
+      return kLoadSWord;
+    case Primitive::kPrimDouble:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kStoreWord;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return kStoreByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return kStoreHalfword;
+    case Primitive::kPrimInt:
+      return kStoreWord;
+    case Primitive::kPrimLong:
+      return kStoreWordPair;
+    case Primitive::kPrimFloat:
+      return kStoreSWord;
+    case Primitive::kPrimDouble:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type,
+                                                Location out_loc,
+                                                Register base,
+                                                Register reg_offset,
+                                                Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+      __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimBoolean:
+      __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+      __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimChar:
+      __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ ldr(out_loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type,
+                                               Location loc,
+                                               Register base,
+                                               Register reg_offset,
+                                               Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      __ strb(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      __ strh(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ str(loc.AsRegister<Register>(), mem_address, cond);
+      break;
+    // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
   bool object_array_get_with_read_barrier =
       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
@@ -4300,70 +4434,40 @@
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-
   Primitive::Type type = instruction->GetType();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
   switch (type) {
-    case Primitive::kPrimBoolean: {
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimByte: {
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
-        __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimShort: {
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
-    case Primitive::kPrimChar: {
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
-      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFromOffset(kLoadWord, out, obj, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+        LoadOperandType load_type = GetLoadOperandType(type);
+        __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+          }
+          temp = obj;
+        } else {
+          __ add(temp, obj, ShifterOperand(data_offset));
+        }
+        codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
       }
       break;
     }
@@ -4392,8 +4496,22 @@
           // reference, if heap poisoning is enabled).
           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
         } else {
-          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+          Register temp = IP;
+
+          if (has_intermediate_address) {
+            // We do not need to compute the intermediate address from the array: the
+            // input instruction has done it already. See the comment in
+            // `TryExtractArrayAccessAddress()`.
+            if (kIsDebugBuild) {
+              HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            }
+            temp = obj;
+          } else {
+            __ add(temp, obj, ShifterOperand(data_offset));
+          }
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           // If read barriers are enabled, emit read barriers other than
           // Baker's using a slow path (and also unpoison the loaded
@@ -4492,54 +4610,68 @@
   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t data_offset =
+      mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  Location value_loc = locations->InAt(2);
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, array, offset);
-      } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
-        __ StoreToOffset(kStoreByte, value, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimByte:
     case Primitive::kPrimShort:
-    case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, array, offset);
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset =
+            data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+        StoreOperandType store_type = GetStoreOperandType(value_type);
+        __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
       } else {
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
-        __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
+        Register temp = IP;
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+          }
+          temp = array;
+        } else {
+          __ add(temp, array, ShifterOperand(data_offset));
+        }
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          value_loc,
+                                          temp,
+                                          index.AsRegister<Register>());
       }
       break;
     }
 
     case Primitive::kPrimNot: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Location value_loc = locations->InAt(2);
       Register value = value_loc.AsRegister<Register>();
-      Register source = value;
+      // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+      // See the comment in instruction_simplifier_shared.cc.
+      DCHECK(!has_intermediate_address);
 
       if (instruction->InputAt(2)->IsNullConstant()) {
         // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, source, array, offset);
+          __ StoreToOffset(kStoreWord, value, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ StoreToOffset(kStoreWord, source, IP, data_offset);
+          __ add(IP, array, ShifterOperand(data_offset));
+          codegen_->StoreToShiftedRegOffset(value_type,
+                                            value_loc,
+                                            IP,
+                                            index.AsRegister<Register>());
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
@@ -4568,8 +4700,11 @@
             __ StoreToOffset(kStoreWord, value, array, offset);
           } else {
             DCHECK(index.IsRegister()) << index;
-            __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-            __ StoreToOffset(kStoreWord, value, IP, data_offset);
+            __ add(IP, array, ShifterOperand(data_offset));
+            codegen_->StoreToShiftedRegOffset(value_type,
+                                              value_loc,
+                                              IP,
+                                              index.AsRegister<Register>());
           }
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           __ b(&done);
@@ -4636,6 +4771,7 @@
         }
       }
 
+      Register source = value;
       if (kPoisonHeapReferences) {
         // Note that in the case where `value` is a null reference,
         // we do not enter this block, as a null reference does not
@@ -4652,8 +4788,12 @@
         __ StoreToOffset(kStoreWord, source, array, offset);
       } else {
         DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, source, IP, data_offset);
+
+        __ add(IP, array, ShifterOperand(data_offset));
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          Location::RegisterLocation(source),
+                                          IP,
+                                          index.AsRegister<Register>());
       }
 
       if (!may_need_runtime_call_for_type_check) {
@@ -4673,23 +4813,7 @@
       break;
     }
 
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreToOffset(kStoreWord, value, array, offset);
-      } else {
-        DCHECK(index.IsRegister()) << index;
-        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, value, IP, data_offset);
-      }
-      break;
-    }
-
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Location value = locations->InAt(2);
       if (index.IsConstant()) {
         size_t offset =
@@ -4703,7 +4827,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
@@ -4717,7 +4840,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       Location value = locations->InAt(2);
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
@@ -4758,6 +4880,37 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  if (second.IsRegister()) {
+    __ add(out.AsRegister<Register>(),
+           first.AsRegister<Register>(),
+           ShifterOperand(second.AsRegister<Register>()));
+  } else {
+    __ AddConstant(out.AsRegister<Register>(),
+                   first.AsRegister<Register>(),
+                   second.GetConstant()->AsIntConstant()->GetValue());
+  }
+}
+
 void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -5449,7 +5602,7 @@
 
 void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -5850,7 +6003,7 @@
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6174,7 +6327,7 @@
 
       // Slow path used to mark the GC root `root`.
       SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       // IP = Thread::Current()->GetIsGcMarking()
@@ -6277,21 +6430,12 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
-  __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including the rb_state,
+  // which shall prevent load-load reordering without using
   // a memory barrier (which would be more expensive).
-  // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // IP, which depends on temp_reg.
-  __ add(obj, obj, ShifterOperand(IP));
+  // obj is unchanged by this operation, but its value now depends on temp_reg.
+  __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
@@ -6323,13 +6467,19 @@
 
   // Slow path used to mark the object `ref` when it is gray.
   SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit out of the lock word with LSRS
+  // which can be a 16-bit instruction unlike the TST immediate.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+  __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -6953,21 +7103,25 @@
 
 void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArmPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
   } else {
-    __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(),
-        locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kArmPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
   }
-  __ LoadFromOffset(kLoadWord,
-                    locations->Out().AsRegister<Register>(),
-                    locations->InAt(0).AsRegister<Register>(),
-                    method_offset);
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 477c4f1..05cb8d1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -21,9 +21,9 @@
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
+#include "string_reference.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
-#include "utils/string_reference.h"
 #include "utils/type_reference.h"
 
 namespace art {
@@ -365,6 +365,24 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  void LoadOrStoreToOffset(Primitive::Type type,
+                           Location loc,
+                           Register base,
+                           int32_t offset,
+                           bool is_load,
+                           Condition cond = AL);
+
+  void LoadFromShiftedRegOffset(Primitive::Type type,
+                                Location out_loc,
+                                Register base,
+                                Register reg_offset,
+                                Condition cond = AL);
+  void StoreToShiftedRegOffset(Primitive::Type type,
+                               Location out_loc,
+                               Register base,
+                               Register reg_offset,
+                               Condition cond = AL);
+
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
                      HInstruction* instruction,
@@ -376,6 +394,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c8d33d5..efeef7b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -33,8 +33,7 @@
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 
-
-using namespace vixl;   // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 #ifdef __
 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
@@ -147,20 +146,20 @@
                                          codegen->GetNumberOfFloatingPointRegisters()));
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
-      register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+      register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList()));
   CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
-      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList()));
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
 
   Register base = masm->StackPointer();
-  int64_t core_spill_size = core_list.TotalSizeInBytes();
-  int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+  int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+  int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
   int64_t reg_size = kXRegSizeInBytes;
   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
   uint32_t ls_access_size = WhichPowerOf2(reg_size);
-  if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+  if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
     // If the offset does not fit in the instruction's immediate field, use an alternate register
     // to compute the base address(float point registers spill base address).
@@ -411,7 +410,7 @@
     }
   }
 
-  vixl::Label* GetReturnLabel() {
+  vixl::aarch64::Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -427,7 +426,7 @@
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  vixl::Label return_label_;
+  vixl::aarch64::Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
 };
@@ -463,7 +462,7 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
                            const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -567,9 +566,9 @@
   __ Bind(&table_start_);
   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
   for (uint32_t i = 0; i < num_entries; i++) {
-    vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+    vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
     DCHECK(target_label->IsBound());
-    ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+    ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
     Literal<int32_t> literal(jump_offset);
@@ -580,8 +579,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCodeARM64(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
+      : SlowPathCodeARM64(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -589,9 +588,8 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -599,30 +597,41 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                                 instruction_,
-                                 instruction_->GetDexPc(),
-                                 this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(obj_.reg(), LR);
+    DCHECK_NE(obj_.reg(), WSP);
+    DCHECK_NE(obj_.reg(), WZR);
+    DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in W0):
+    //
+    //   W0 <- obj
+    //   W0 <- ReadBarrierMark(W0)
+    //   obj <- W0
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64WordSize>(obj_.reg());
+    // This runtime call does not require a stack map.
+    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ B(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
@@ -668,14 +677,12 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
     DCHECK(!(instruction_->IsArrayGet() &&
-             instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
 
@@ -790,8 +797,8 @@
 
  private:
   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
-    size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
-    size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
         return Register(VIXLRegCodeFromART(i), kXRegSize);
@@ -909,8 +916,8 @@
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
-                    callee_saved_core_registers.list(),
-                    callee_saved_fp_registers.list(),
+                    callee_saved_core_registers.GetList(),
+                    callee_saved_fp_registers.GetList(),
                     compiler_options,
                     stats),
       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1060,17 +1067,17 @@
   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
-  return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
-                          core_spill_mask_);
+  return CPURegList(CPURegister::kRegister, kXRegSize,
+                    core_spill_mask_);
 }
 
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
                                          GetNumberOfFloatingPointRegisters()));
-  return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
-                          fpu_spill_mask_);
+  return CPURegList(CPURegister::kFPRegister, kDRegSize,
+                    fpu_spill_mask_);
 }
 
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1094,7 +1101,7 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
-  vixl::Label done;
+  vixl::aarch64::Label done;
   if (value_can_be_null) {
     __ Cbz(value, &done);
   }
@@ -1119,12 +1126,12 @@
   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   reserved_core_registers.Combine(runtime_reserved_core_registers);
   while (!reserved_core_registers.IsEmpty()) {
-    blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
+    blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
   }
 
   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
   while (!reserved_fp_registers.IsEmpty()) {
-    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
+    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
   }
 
   if (GetGraph()->IsDebuggable()) {
@@ -1133,7 +1140,7 @@
     // now, just block them.
     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
     while (!reserved_fp_registers_debuggable.IsEmpty()) {
-      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
+      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
     }
   }
 }
@@ -1344,7 +1351,7 @@
   DCHECK(!src.IsPostIndex());
 
   // TODO(vixl): Let the MacroAssembler handle MemOperand.
-  __ Add(temp_base, src.base(), OperandFromMemOperand(src));
+  __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1436,7 +1443,7 @@
 
   // TODO(vixl): Let the MacroAssembler handle this.
   Operand op = OperandFromMemOperand(dst);
-  __ Add(temp_base, dst.base(), op);
+  __ Add(temp_base, dst.GetBaseRegister(), op);
   MemOperand base = MemOperand(temp_base);
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1489,8 +1496,17 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                             HInstruction* instruction,
+                                                             SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  BlockPoolsScope block_pools(GetVIXLAssembler());
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
+  __ Blr(lr);
+}
+
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
-                                                                     vixl::Register class_reg) {
+                                                                     Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
@@ -1755,7 +1771,7 @@
         __ Sub(dst, lhs, rhs);
       } else if (instr->IsRor()) {
         if (rhs.IsImmediate()) {
-          uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+          uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
           __ Ror(dst, lhs, shift);
         } else {
           // Ensure shift distance is in the same size register as the result. If
@@ -1818,7 +1834,7 @@
       Register lhs = InputRegisterAt(instr, 0);
       Operand rhs = InputOperandAt(instr, 1);
       if (rhs.IsImmediate()) {
-        uint32_t shift_value = rhs.immediate() &
+        uint32_t shift_value = rhs.GetImmediate() &
             (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, shift_value);
@@ -1828,7 +1844,7 @@
           __ Lsr(dst, lhs, shift_value);
         }
       } else {
-        Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+        Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
 
         if (instr->IsShl()) {
           __ Lsl(dst, lhs, rhs_reg);
@@ -1965,9 +1981,8 @@
   }
 }
 
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the
-  // HArm64IntermediateAddress instruction yet.
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
   DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1976,10 +1991,9 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
-    HArm64IntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the
-  // HArm64IntermediateAddress instruction yet.
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+    HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
   DCHECK(!kEmitCompilerReadBarrier);
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
@@ -2014,13 +2028,14 @@
   if (instr->GetType() == Primitive::kPrimLong &&
       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
-    vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+    vixl::aarch64::Instruction* prev =
+        masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
     if (prev->IsLoadOrStore()) {
       // Make sure we emit only exactly one nop.
-      vixl::CodeBufferCheckScope scope(masm,
-                                       vixl::kInstructionSize,
-                                       vixl::CodeBufferCheckScope::kCheck,
-                                       vixl::CodeBufferCheckScope::kExactSize);
+      vixl::aarch64::CodeBufferCheckScope scope(masm,
+                                                kInstructionSize,
+                                                vixl::aarch64::CodeBufferCheckScope::kCheck,
+                                                vixl::aarch64::CodeBufferCheckScope::kExactSize);
       __ nop();
     }
   }
@@ -2078,9 +2093,8 @@
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2093,15 +2107,15 @@
       source = HeapOperand(obj, offset);
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
-      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      if (instruction->GetArray()->IsIntermediateAddress()) {
         // The read barrier instrumentation does not support the
-        // HArm64IntermediateAddress instruction yet.
+        // HIntermediateAddress instruction yet.
         DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
-        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        // `TryExtractArrayAccessAddress()`.
         if (kIsDebugBuild) {
-          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
         }
         temp = obj;
@@ -2185,15 +2199,15 @@
     } else {
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      if (instruction->GetArray()->IsIntermediateAddress()) {
         // The read barrier instrumentation does not support the
-        // HArm64IntermediateAddress instruction yet.
+        // HIntermediateAddress instruction yet.
         DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
-        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        // `TryExtractArrayAccessAddress()`.
         if (kIsDebugBuild) {
-          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
         }
         temp = array;
@@ -2209,8 +2223,8 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
     DCHECK(needs_write_barrier);
-    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
-    vixl::Label done;
+    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
+    vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
     {
       // We use a block to end the scratch scope before the write barrier, thus
@@ -2235,7 +2249,7 @@
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
-          vixl::Label non_zero;
+          vixl::aarch64::Label non_zero;
           __ Cbnz(Register(value), &non_zero);
           if (!index.IsConstant()) {
             __ Add(temp, array, offset);
@@ -2289,7 +2303,7 @@
           __ Cmp(temp, temp2);
 
           if (instruction->StaticTypeOfArrayIsObjectArray()) {
-            vixl::Label do_put;
+            vixl::aarch64::Label do_put;
             __ B(eq, &do_put);
             // If heap poisoning is enabled, the `temp` reference has
             // not been unpoisoned yet; unpoison it now.
@@ -2822,11 +2836,11 @@
 
 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
                                                           size_t condition_input_index,
-                                                          vixl::Label* true_target,
-                                                          vixl::Label* false_target) {
+                                                          vixl::aarch64::Label* true_target,
+                                                          vixl::aarch64::Label* false_target) {
   // FP branching requires both targets to be explicit. If either of the targets
   // is nullptr (fallthrough) use and bind `fallthrough_target` instead.
-  vixl::Label fallthrough_target;
+  vixl::aarch64::Label fallthrough_target;
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -2884,7 +2898,7 @@
       Operand rhs = InputOperandAt(condition, 1);
 
       Condition arm64_cond;
-      vixl::Label* non_fallthrough_target;
+      vixl::aarch64::Label* non_fallthrough_target;
       if (true_target == nullptr) {
         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
         non_fallthrough_target = false_target;
@@ -2894,7 +2908,7 @@
       }
 
       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
-          rhs.IsImmediate() && (rhs.immediate() == 0)) {
+          rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -2943,10 +2957,14 @@
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
-  vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
-      nullptr : codegen_->GetLabelOf(true_successor);
-  vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
-      nullptr : codegen_->GetLabelOf(false_successor);
+  vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
+    true_target = nullptr;
+  }
+  vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
+    false_target = nullptr;
+  }
   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
@@ -3130,7 +3148,7 @@
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  vixl::Label done, zero;
+  vixl::aarch64::Label done, zero;
   SlowPathCodeARM64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
@@ -3155,7 +3173,7 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
@@ -3172,7 +3190,7 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop, success;
+      vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
@@ -3191,7 +3209,7 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::Label exact_check;
+      vixl::aarch64::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
@@ -3328,7 +3346,7 @@
                                                           is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  vixl::Label done;
+  vixl::aarch64::Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ Cbz(obj, &done);
@@ -3350,7 +3368,7 @@
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop, compare_classes;
+      vixl::aarch64::Label loop, compare_classes;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
@@ -3377,7 +3395,7 @@
 
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
@@ -3402,7 +3420,7 @@
 
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
-      vixl::Label check_non_primitive_component_type;
+      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
@@ -3538,7 +3556,7 @@
   __ Ldr(temp,
       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
+      invoke->GetImtIndex(), kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -3628,17 +3646,17 @@
       // Add ADRP with its PC-relative DexCache access patch.
       const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
       uint32_t element_offset = invoke->GetDexCacheArrayOffset();
-      vixl::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
       }
       // Add LDR with its PC-relative DexCache access patch.
-      vixl::Label* ldr_label =
+      vixl::aarch64::Label* ldr_label =
           NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(ldr_label);
         __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
       }
@@ -3675,8 +3693,8 @@
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
-      vixl::Label* label = &relative_call_patches_.back().label;
-      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      vixl::aarch64::Label* label = &relative_call_patches_.back().label;
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
       __ Bind(label);
       __ bl(0);  // Branch and link to itself. This will be overriden at link time.
       break;
@@ -3735,58 +3753,64 @@
   __ Blr(lr);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                          uint32_t string_index,
-                                                          vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+    const DexFile& dex_file,
+    uint32_t string_index,
+    vixl::aarch64::Label* adrp_label) {
   return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(const DexFile& dex_file,
-                                                        uint32_t type_index,
-                                                        vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
+    const DexFile& dex_file,
+    uint32_t type_index,
+    vixl::aarch64::Label* adrp_label) {
   return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                                 uint32_t element_offset,
-                                                                 vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file,
+    uint32_t element_offset,
+    vixl::aarch64::Label* adrp_label) {
   return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
 }
 
-vixl::Label* CodeGeneratorARM64::NewPcRelativePatch(const DexFile& dex_file,
-                                                    uint32_t offset_or_index,
-                                                    vixl::Label* adrp_label,
-                                                    ArenaDeque<PcRelativePatchInfo>* patches) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
+    const DexFile& dex_file,
+    uint32_t offset_or_index,
+    vixl::aarch64::Label* adrp_label,
+    ArenaDeque<PcRelativePatchInfo>* patches) {
   // Add a patch entry and return the label.
   patches->emplace_back(dex_file, offset_or_index);
   PcRelativePatchInfo* info = &patches->back();
-  vixl::Label* label = &info->label;
+  vixl::aarch64::Label* label = &info->label;
   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
   return label;
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
     const DexFile& dex_file, uint32_t string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
     const DexFile& dex_file, uint32_t type_index) {
   return boot_image_type_patches_.GetOrCreate(
       TypeReference(&dex_file, type_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
+    uint64_t address) {
   bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
   Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(
+    uint64_t address) {
   return DeduplicateUint64Literal(address);
 }
 
@@ -3805,76 +3829,76 @@
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal->GetOffset(),
                                                        target_method.dex_file,
                                                        target_method.dex_method_index));
   }
   for (const auto& entry : call_patches_) {
     const MethodReference& target_method = entry.first;
-    vixl::Literal<uint64_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(),
+    vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::CodePatch(literal->GetOffset(),
                                                      target_method.dex_file,
                                                      target_method.dex_method_index));
   }
-  for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(),
+  for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(),
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
-    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
                                                               &info.target_dex_file,
-                                                              info.pc_insn_label->location(),
+                                                              info.pc_insn_label->GetLocation(),
                                                               info.offset_or_index));
   }
   for (const auto& entry : boot_image_string_patches_) {
     const StringReference& target_string = entry.first;
-    vixl::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::StringPatch(literal->offset(),
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
                                                        target_string.dex_file,
                                                        target_string.string_index));
   }
   for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.location(),
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(),
                                                                &info.target_dex_file,
-                                                               info.pc_insn_label->location(),
+                                                               info.pc_insn_label->GetLocation(),
                                                                info.offset_or_index));
   }
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
-    vixl::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::TypePatch(literal->offset(),
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
                                                      target_type.dex_file,
                                                      target_type.type_index));
   }
   for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.location(),
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(),
                                                              &info.target_dex_file,
-                                                             info.pc_insn_label->location(),
+                                                             info.pc_insn_label->GetLocation(),
                                                              info.offset_or_index));
   }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    vixl::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal->offset()));
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
   }
 }
 
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
                                                                       Uint32ToLiteralMap* map) {
   return map->GetOrCreate(
       value,
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
   return uint64_literals_.GetOrCreate(
       value,
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
     MethodReference target_method,
     MethodToLiteralMap* map) {
   return map->GetOrCreate(
@@ -3882,12 +3906,12 @@
       [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &method_patches_);
 }
 
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
     MethodReference target_method) {
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
@@ -3961,7 +3985,7 @@
     CodeGenerator::CreateLoadClassLocationSummary(
         cls,
         LocationFrom(calling_convention.GetRegisterAt(0)),
-        LocationFrom(vixl::x0),
+        LocationFrom(vixl::aarch64::x0),
         /* code_generator_supports_read_barrier */ true);
     return;
   }
@@ -4013,16 +4037,17 @@
       // Add ADRP with its PC-relative type patch.
       const DexFile& dex_file = cls->GetDexFile();
       uint32_t type_index = cls->GetTypeIndex();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(out.X(), /* offset placeholder */ 0);
       }
       // Add ADD with its PC-relative type patch.
-      vixl::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
+      vixl::aarch64::Label* add_label =
+          codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(add_label);
         __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
       }
@@ -4055,14 +4080,15 @@
       // Add ADRP with its PC-relative DexCache access patch.
       const DexFile& dex_file = cls->GetDexFile();
       uint32_t element_offset = cls->GetDexCacheElementOffset();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      vixl::aarch64::Label* adrp_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(out.X(), /* offset placeholder */ 0);
       }
       // Add LDR with its PC-relative DexCache access patch.
-      vixl::Label* ldr_label =
+      vixl::aarch64::Label* ldr_label =
           codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
@@ -4182,17 +4208,17 @@
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
       uint32_t string_index = load->GetStringIndex();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(out.X(), /* offset placeholder */ 0);
       }
       // Add ADD with its PC-relative String patch.
-      vixl::Label* add_label =
+      vixl::aarch64::Label* add_label =
           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(add_label);
         __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
       }
@@ -4224,14 +4250,15 @@
       // Add ADRP with its PC-relative DexCache access patch.
       const DexFile& dex_file = load->GetDexFile();
       uint32_t element_offset = load->GetDexCacheElementOffset();
-      vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      vixl::aarch64::Label* adrp_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
       {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(adrp_label);
         __ adrp(out.X(), /* offset placeholder */ 0);
       }
       // Add LDR with its PC-relative DexCache access patch.
-      vixl::Label* ldr_label =
+      vixl::aarch64::Label* ldr_label =
           codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
@@ -4273,7 +4300,7 @@
 
 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -4371,7 +4398,7 @@
 
 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
   locations->SetOut(LocationFrom(x0));
@@ -4396,7 +4423,7 @@
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(LocationFrom(kArtMethodRegister));
@@ -4452,7 +4479,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
-  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
+  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
@@ -4549,7 +4576,8 @@
 void LocationsBuilderARM64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4766,7 +4794,7 @@
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
 }
@@ -4884,7 +4912,7 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Roughly set 16 as max average assemblies generated per HIR in a graph.
-  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
   // make sure we don't emit it if the target may run out of range.
   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
@@ -5029,9 +5057,9 @@
 
 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                             Location root,
-                                                            vixl::Register obj,
+                                                            Register obj,
                                                             uint32_t offset,
-                                                            vixl::Label* fixup_label) {
+                                                            vixl::aarch64::Label* fixup_label) {
   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
   if (kEmitCompilerReadBarrier) {
     if (kUseBakerReadBarrier) {
@@ -5047,7 +5075,7 @@
       if (fixup_label == nullptr) {
         __ Ldr(root_reg, MemOperand(obj, offset));
       } else {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(fixup_label);
         __ ldr(root_reg, MemOperand(obj, offset));
       }
@@ -5061,7 +5089,7 @@
 
       // Slow path used to mark the GC root `root`.
       SlowPathCodeARM64* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       MacroAssembler* masm = GetVIXLAssembler();
@@ -5078,7 +5106,7 @@
       if (fixup_label == nullptr) {
         __ Add(root_reg.X(), obj.X(), offset);
       } else {
-        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        SingleEmissionCheckScope guard(GetVIXLAssembler());
         __ Bind(fixup_label);
         __ add(root_reg.X(), obj.X(), offset);
       }
@@ -5091,7 +5119,7 @@
     if (fixup_label == nullptr) {
       __ Ldr(root_reg, MemOperand(obj, offset));
     } else {
-      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      SingleEmissionCheckScope guard(GetVIXLAssembler());
       __ Bind(fixup_label);
       __ ldr(root_reg, MemOperand(obj, offset));
     }
@@ -5102,7 +5130,7 @@
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                Location ref,
-                                                               vixl::Register obj,
+                                                               Register obj,
                                                                uint32_t offset,
                                                                Register temp,
                                                                bool needs_null_check,
@@ -5126,7 +5154,7 @@
 
 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                Location ref,
-                                                               vixl::Register obj,
+                                                               Register obj,
                                                                uint32_t data_offset,
                                                                Location index,
                                                                Register temp,
@@ -5157,7 +5185,7 @@
 
 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                    Location ref,
-                                                                   vixl::Register obj,
+                                                                   Register obj,
                                                                    uint32_t offset,
                                                                    Location index,
                                                                    size_t scale_factor,
@@ -5206,23 +5234,12 @@
   // /* LockWord */ lock_word = LockWord(monitor)
   static_assert(sizeof(LockWord) == sizeof(int32_t),
                 "art::LockWord and int32_t have different sizes.");
-  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
-  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
-  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
-  static_assert(
-      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
-      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
 
-  // Introduce a dependency on the high bits of rb_state, which shall
-  // be all zeroes, to prevent load-load reordering, and without using
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
-  Register temp2 = temps.AcquireW();
-  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
-  // obj is unchanged by this operation, but its value now depends on
-  // temp2, which depends on temp.
-  __ Add(obj, obj, Operand(temp2));
-  temps.Release(temp2);
+  // obj is unchanged by this operation, but its value now depends on temp.
+  __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
 
   // The actual reference load.
   if (index.IsValid()) {
@@ -5248,7 +5265,7 @@
         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
         Load(type, ref_reg, HeapOperand(obj, computed_offset));
       } else {
-        temp2 = temps.AcquireW();
+        Register temp2 = temps.AcquireW();
         __ Add(temp2, obj, offset);
         Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
         temps.Release(temp2);
@@ -5269,13 +5286,16 @@
 
   // Slow path used to mark the object `ref` when it is gray.
   SlowPathCodeARM64* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
   //   ref = ReadBarrier::Mark(ref);
-  __ Cmp(temp, ReadBarrier::gray_ptr_);
-  __ B(eq, slow_path->GetEntryLabel());
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -5350,18 +5370,19 @@
 
 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArm64PointerSize).SizeValue();
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
   } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kArm64PointerSize));
     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
+    __ Ldr(XRegisterFrom(locations->Out()),
+           MemOperand(XRegisterFrom(locations->Out()), method_offset));
   }
-  __ Ldr(XRegisterFrom(locations->Out()),
-         MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
 }
 
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d4bf695..88e8cea 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -24,11 +24,16 @@
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/string_reference.h"
 #include "utils/type_reference.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+// TODO: make vixl clean wrt -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "a64/disasm-a64.h"
+#include "a64/macro-assembler-a64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
@@ -38,32 +43,47 @@
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArm64WordSize = kArm64PointerSize;
 
-static const vixl::Register kParameterCoreRegisters[] = {
-  vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
+static const vixl::aarch64::Register kParameterCoreRegisters[] = {
+  vixl::aarch64::x1,
+  vixl::aarch64::x2,
+  vixl::aarch64::x3,
+  vixl::aarch64::x4,
+  vixl::aarch64::x5,
+  vixl::aarch64::x6,
+  vixl::aarch64::x7
 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-static const vixl::FPRegister kParameterFPRegisters[] = {
-  vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7
+static const vixl::aarch64::FPRegister kParameterFPRegisters[] = {
+  vixl::aarch64::d0,
+  vixl::aarch64::d1,
+  vixl::aarch64::d2,
+  vixl::aarch64::d3,
+  vixl::aarch64::d4,
+  vixl::aarch64::d5,
+  vixl::aarch64::d6,
+  vixl::aarch64::d7
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-const vixl::Register tr = vixl::x19;                        // Thread Register
-static const vixl::Register kArtMethodRegister = vixl::x0;  // Method register on invoke.
+// Thread Register
+const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Method register on invoke.
+static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
+const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
+                                                             vixl::aarch64::ip1);
+const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
 
-const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
-const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
 
 // Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
-                                                   vixl::kXRegSize,
-                                                   vixl::x20.code(),
-                                                   vixl::x30.code());
-const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
-                                                 vixl::kDRegSize,
-                                                 vixl::d8.code(),
-                                                 vixl::d15.code());
+const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
+                                                            vixl::aarch64::kXRegSize,
+                                                            vixl::aarch64::x20.GetCode(),
+                                                            vixl::aarch64::x30.GetCode());
+const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
+                                                          vixl::aarch64::kDRegSize,
+                                                          vixl::aarch64::d8.GetCode(),
+                                                          vixl::aarch64::d15.GetCode());
 Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
@@ -71,15 +91,15 @@
   explicit SlowPathCodeARM64(HInstruction* instruction)
       : SlowPathCode(instruction), entry_label_(), exit_label_() {}
 
-  vixl::Label* GetEntryLabel() { return &entry_label_; }
-  vixl::Label* GetExitLabel() { return &exit_label_; }
+  vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
+  vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
 
   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
 
  private:
-  vixl::Label entry_label_;
-  vixl::Label exit_label_;
+  vixl::aarch64::Label entry_label_;
+  vixl::aarch64::Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
@@ -89,27 +109,42 @@
   explicit JumpTableARM64(HPackedSwitch* switch_instr)
     : switch_instr_(switch_instr), table_start_() {}
 
-  vixl::Label* GetTableStartLabel() { return &table_start_; }
+  vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
 
   void EmitTable(CodeGeneratorARM64* codegen);
 
  private:
   HPackedSwitch* const switch_instr_;
-  vixl::Label table_start_;
+  vixl::aarch64::Label table_start_;
 
   DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
 };
 
-static const vixl::Register kRuntimeParameterCoreRegisters[] =
-    { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
+    { vixl::aarch64::x0,
+      vixl::aarch64::x1,
+      vixl::aarch64::x2,
+      vixl::aarch64::x3,
+      vixl::aarch64::x4,
+      vixl::aarch64::x5,
+      vixl::aarch64::x6,
+      vixl::aarch64::x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::FPRegister kRuntimeParameterFpuRegisters[] =
-    { vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7 };
+static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] =
+    { vixl::aarch64::d0,
+      vixl::aarch64::d1,
+      vixl::aarch64::d2,
+      vixl::aarch64::d3,
+      vixl::aarch64::d4,
+      vixl::aarch64::d5,
+      vixl::aarch64::d6,
+      vixl::aarch64::d7 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 
-class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                                vixl::aarch64::FPRegister> {
  public:
   static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 
@@ -126,7 +161,8 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
 };
 
-class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
+                                                            vixl::aarch64::FPRegister> {
  public:
   InvokeDexCallingConvention()
       : CallingConvention(kParameterCoreRegisters,
@@ -166,23 +202,23 @@
   FieldAccessCallingConventionARM64() {}
 
   Location GetObjectLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x1);
+    return helpers::LocationFrom(vixl::aarch64::x1);
   }
   Location GetFieldIndexLocation() const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::x0);
+    return helpers::LocationFrom(vixl::aarch64::x0);
   }
   Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
     return Primitive::Is64BitType(type)
-        ? helpers::LocationFrom(vixl::x2)
+        ? helpers::LocationFrom(vixl::aarch64::x2)
         : (is_instance
-            ? helpers::LocationFrom(vixl::x2)
-            : helpers::LocationFrom(vixl::x1));
+            ? helpers::LocationFrom(vixl::aarch64::x2)
+            : helpers::LocationFrom(vixl::aarch64::x1));
   }
   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return helpers::LocationFrom(vixl::d0);
+    return helpers::LocationFrom(vixl::aarch64::d0);
   }
 
  private:
@@ -208,10 +244,11 @@
   }
 
   Arm64Assembler* GetAssembler() const { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
 
  private:
-  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
+                                        vixl::aarch64::Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
 
@@ -256,9 +293,9 @@
   // while honoring read barriers (if any).
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
-                               vixl::Register obj,
+                               vixl::aarch64::Register obj,
                                uint32_t offset,
-                               vixl::Label* fixup_label = nullptr);
+                               vixl::aarch64::Label* fixup_label = nullptr);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -266,8 +303,8 @@
   void HandleShift(HBinaryOperation* instr);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
-                             vixl::Label* true_target,
-                             vixl::Label* false_target);
+                             vixl::aarch64::Label* true_target,
+                             vixl::aarch64::Label* false_target);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -327,12 +364,12 @@
 
  private:
   Arm64Assembler* GetAssembler() const;
-  vixl::MacroAssembler* GetVIXLAssembler() const {
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
     return GetAssembler()->vixl_masm_;
   }
 
   CodeGeneratorARM64* const codegen_;
-  vixl::UseScratchRegisterScope vixl_temps_;
+  vixl::aarch64::UseScratchRegisterScope vixl_temps_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
 };
@@ -348,12 +385,12 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
-  vixl::CPURegList GetFramePreservedCoreRegisters() const;
-  vixl::CPURegList GetFramePreservedFPRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
+  vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  vixl::Label* GetLabelOf(HBasicBlock* block) {
+  vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
     block = FirstNonEmptyBlock(block);
     return &(block_labels_[block->GetBlockId()]);
   }
@@ -368,19 +405,21 @@
   }
 
   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
-    vixl::Label* block_entry_label = GetLabelOf(block);
+    vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
-    return block_entry_label->location();
+    return block_entry_label->GetLocation();
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
   const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
-  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
 
   // Emit a write barrier.
-  void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
+  void MarkGCCard(vixl::aarch64::Register object,
+                  vixl::aarch64::Register value,
+                  bool value_can_be_null);
 
   void GenerateMemoryBarrier(MemBarrierKind kind);
 
@@ -399,8 +438,8 @@
   // (xzr, wzr), or make for poor allocatable registers (sp alignment
   // requirements, etc.). This also facilitates our task as all other registers
   // can easily be mapped via to or from their type and index or code.
-  static const int kNumberOfAllocatableRegisters = vixl::kNumberOfRegisters - 1;
-  static const int kNumberOfAllocatableFPRegisters = vixl::kNumberOfFPRegisters;
+  static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
+  static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
   static constexpr int kNumberOfAllocatableRegisterPairs = 0;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -418,6 +457,10 @@
     block_labels_.resize(GetGraph()->GetBlocks().size());
   }
 
+  // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
+  // These instructions can only encode offsets that are multiples of the register size accessed.
+  uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; }
+
   JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
     jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
     return jump_tables_.back().get();
@@ -426,18 +469,24 @@
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
-  void MoveConstant(vixl::CPURegister destination, HConstant* constant);
+  void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
-  void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void Load(Primitive::Type type,
+            vixl::aarch64::CPURegister dst,
+            const vixl::aarch64::MemOperand& src);
+  void Store(Primitive::Type type,
+             vixl::aarch64::CPURegister src,
+             const vixl::aarch64::MemOperand& dst);
   void LoadAcquire(HInstruction* instruction,
-                   vixl::CPURegister dst,
-                   const vixl::MemOperand& src,
+                   vixl::aarch64::CPURegister dst,
+                   const vixl::aarch64::MemOperand& src,
                    bool needs_null_check);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void StoreRelease(Primitive::Type type,
+                    vixl::aarch64::CPURegister src,
+                    const vixl::aarch64::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -450,6 +499,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -484,32 +539,33 @@
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
-  vixl::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                        uint32_t string_index,
-                                        vixl::Label* adrp_label = nullptr);
+  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                 uint32_t string_index,
+                                                 vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative type patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
-  vixl::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
-                                      uint32_t type_index,
-                                      vixl::Label* adrp_label = nullptr);
+  vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
+                                               uint32_t type_index,
+                                               vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative dex cache array patch for an instruction and return
   // the label to be bound before the instruction. The instruction will be
   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
   // pointing to the associated ADRP patch label).
-  vixl::Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                               uint32_t element_offset,
-                                               vixl::Label* adrp_label = nullptr);
+  vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch(
+      const DexFile& dex_file,
+      uint32_t element_offset,
+      vixl::aarch64::Label* adrp_label = nullptr);
 
-  vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                             uint32_t string_index);
-  vixl::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                           uint32_t type_index);
-  vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
-  vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                                      uint32_t string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                                    uint32_t type_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
@@ -517,29 +573,29 @@
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                              Location ref,
-                                             vixl::Register obj,
+                                             vixl::aarch64::Register obj,
                                              uint32_t offset,
-                                             vixl::Register temp,
+                                             vixl::aarch64::Register temp,
                                              bool needs_null_check,
                                              bool use_load_acquire);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
                                              Location ref,
-                                             vixl::Register obj,
+                                             vixl::aarch64::Register obj,
                                              uint32_t data_offset,
                                              Location index,
-                                             vixl::Register temp,
+                                             vixl::aarch64::Register temp,
                                              bool needs_null_check);
   // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
-                                                 vixl::Register obj,
+                                                 vixl::aarch64::Register obj,
                                                  uint32_t offset,
                                                  Location index,
                                                  size_t scale_factor,
-                                                 vixl::Register temp,
+                                                 vixl::aarch64::Register temp,
                                                  bool needs_null_check,
                                                  bool use_load_acquire);
 
@@ -597,24 +653,25 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
-  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
-                                          vixl::Literal<uint64_t>*,
+                                          vixl::aarch64::Literal<uint64_t>*,
                                           MethodReferenceComparator>;
   using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              vixl::Literal<uint32_t>*,
+                                              vixl::aarch64::Literal<uint32_t>*,
                                               StringReferenceValueComparator>;
   using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                            vixl::Literal<uint32_t>*,
+                                            vixl::aarch64::Literal<uint32_t>*,
                                             TypeReferenceValueComparator>;
 
-  vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
-  vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-  vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
-                                                    MethodToLiteralMap* map);
-  vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
-  vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
+                                                             Uint32ToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
+                                                             MethodToLiteralMap* map);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
   // and boot image strings/types. The only difference is the interpretation of the
@@ -626,21 +683,21 @@
     const DexFile& target_dex_file;
     // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
-    vixl::Label label;
-    vixl::Label* pc_insn_label;
+    vixl::aarch64::Label label;
+    vixl::aarch64::Label* pc_insn_label;
   };
 
-  vixl::Label* NewPcRelativePatch(const DexFile& dex_file,
-                                  uint32_t offset_or_index,
-                                  vixl::Label* adrp_label,
-                                  ArenaDeque<PcRelativePatchInfo>* patches);
+  vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
+                                           uint32_t offset_or_index,
+                                           vixl::aarch64::Label* adrp_label,
+                                           ArenaDeque<PcRelativePatchInfo>* patches);
 
   void EmitJumpTables();
 
   // Labels for each block that will be compiled.
-  // We use a deque so that the `vixl::Label` objects do not move in memory.
-  ArenaDeque<vixl::Label> block_labels_;  // Indexed by block id.
-  vixl::Label frame_entry_label_;
+  // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
+  ArenaDeque<vixl::aarch64::Label> block_labels_;  // Indexed by block id.
+  vixl::aarch64::Label frame_entry_label_;
   ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
@@ -659,7 +716,7 @@
   MethodToLiteralMap call_patches_;
   // Relative call patch info.
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
-  ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
+  ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
   // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index b6dca95..39248aa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -415,7 +415,7 @@
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -1855,7 +1855,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2467,7 +2467,7 @@
 void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
   Primitive::Type type = div->GetResultType();
   LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -3430,7 +3430,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3557,7 +3557,7 @@
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -3791,7 +3791,7 @@
   __ LoadFromOffset(kLoadWord, temp, temp,
       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
+      invoke->GetImtIndex(), kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -4218,7 +4218,7 @@
 
 void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4397,7 +4397,7 @@
 
 void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
@@ -4423,7 +4423,7 @@
 
 void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4593,7 +4593,7 @@
 void LocationsBuilderMIPS::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -4830,7 +4830,7 @@
 
 void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -4859,7 +4859,7 @@
   if (!isR6 &&
       ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
        (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
-    call_kind = LocationSummary::kCall;
+    call_kind = LocationSummary::kCallOnMainOnly;
   }
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -5380,22 +5380,25 @@
 
 void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kMipsPointerSize).SizeValue();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      method_offset);
   } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kMipsPointerSize));
     __ LoadFromOffset(kLoadWord,
                       locations->Out().AsRegister<Register>(),
                       locations->InAt(0).AsRegister<Register>(),
                       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->Out().AsRegister<Register>(),
+                      method_offset);
   }
-  __ LoadFromOffset(kLoadWord,
-                    locations->Out().AsRegister<Register>(),
-                    locations->InAt(0).AsRegister<Register>(),
-                    method_offset);
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 9f2664c..29b8c20 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -362,7 +362,7 @@
                                     dex_pc,
                                     this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -1436,7 +1436,7 @@
   bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
   if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2951,7 +2951,7 @@
   __ LoadFromOffset(kLoadDoubleword, temp, temp,
       mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
+      invoke->GetImtIndex(), kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3292,7 +3292,7 @@
 
 void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -3419,7 +3419,7 @@
 
 void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
@@ -3440,7 +3440,7 @@
 
 void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3600,7 +3600,8 @@
 void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
   LocationSummary::CallKind call_kind =
-      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                           : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
   switch (type) {
@@ -3813,7 +3814,7 @@
 
 void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index be20f1f..528e94f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -140,12 +140,29 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
     InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<Register>(), array_len);
+    }
     x86_codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -332,7 +349,7 @@
                                  instruction_->GetDexPc(),
                                  this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -430,8 +447,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -439,9 +456,9 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -449,30 +466,39 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
-    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, ESP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in EAX):
+    //
+    //   EAX <- obj
+    //   EAX <- ReadBarrierMark(EAX)
+    //   obj <- EAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86WordSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
@@ -518,8 +544,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -793,6 +818,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                           HInstruction* instruction,
+                                                           SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
                                    const X86InstructionSetFeatures& isa_features,
                                    const CompilerOptions& compiler_options,
@@ -2058,7 +2090,7 @@
       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
+      invoke->GetImtIndex(), kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
@@ -2185,7 +2217,7 @@
   LocationSummary::CallKind call_kind =
       ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
        && result_type == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -3440,7 +3472,7 @@
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
 
@@ -3543,7 +3575,7 @@
   Primitive::Type type = rem->GetResultType();
 
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
-      ? LocationSummary::kCall
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
@@ -3985,7 +4017,7 @@
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4018,7 +4050,7 @@
 
 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -4073,20 +4105,21 @@
 
 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86PointerSize).SizeValue();
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(), method_offset));
   } else {
-    __ movl(locations->InAt(0).AsRegister<Register>(),
-        Address(locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kX86PointerSize));
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->InAt(0).AsRegister<Register>(),
+                    mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
     // temp = temp->GetImtEntryAt(method_offset);
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
+    __ movl(locations->Out().AsRegister<Register>(),
+            Address(locations->Out().AsRegister<Register>(), method_offset));
   }
-  __ movl(locations->Out().AsRegister<Register>(),
-          Address(locations->InAt(0).AsRegister<Register>(), method_offset));
 }
 
 void LocationsBuilderX86::VisitNot(HNot* not_) {
@@ -5517,10 +5550,16 @@
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
@@ -5535,7 +5574,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -5569,12 +5611,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    Register length = length_loc.AsRegister<Register>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<Register>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<Register>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<Register>());
+      Register length = length_loc.AsRegister<Register>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<Register>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -6242,7 +6300,7 @@
 
 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6694,7 +6752,7 @@
 
 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6933,7 +6991,7 @@
 
       // Slow path used to mark the GC root `root`.
       SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
@@ -7063,7 +7121,7 @@
 
   // Slow path used to mark the object `ref` when it is gray.
   SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2a9fb80..1290172 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -336,6 +336,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86WordSize;
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cac33cd..0f0129b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -194,14 +194,31 @@
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
+    // Are we using an array length from memory?
+    HInstruction* array_length = instruction_->InputAt(1);
+    Location length_loc = locations->InAt(1);
+    InvokeRuntimeCallingConvention calling_convention;
+    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+      // Load the array length into our temporary.
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+      // Check for conflicts with index.
+      if (length_loc.Equals(locations->InAt(0))) {
+        // We know we aren't using parameter 2.
+        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      }
+      __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+    }
+
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
-    InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
         locations->InAt(0),
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
         Primitive::kPrimInt,
-        locations->InAt(1),
+        length_loc,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
     uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -352,7 +369,7 @@
                                     dex_pc,
                                     this);
       CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -451,8 +468,8 @@
 // Slow path marking an object during a read barrier.
 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
-      : SlowPathCode(instruction), out_(out), obj_(obj) {
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
+      : SlowPathCode(instruction), obj_(obj) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -460,9 +477,9 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg_out = out_.AsRegister<Register>();
+    Register reg = obj_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -470,30 +487,39 @@
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
-    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
-    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
-
-    RestoreLiveRegisters(codegen, locations);
+    DCHECK_NE(reg, RSP);
+    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   RDI <- obj
+    //   RAX <- ReadBarrierMark(RDI)
+    //   obj <- RAX
+    //
+    // we just use rX (the register holding `obj`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64WordSize>(reg);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location out_;
   const Location obj_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
@@ -539,8 +565,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
-            instruction_->GetLocations()->Intrinsified()))
+           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -1021,6 +1046,13 @@
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
 static constexpr int kNumberOfCpuRegisterPairs = 0;
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
@@ -2287,7 +2319,7 @@
       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
+      invoke->GetImtIndex(), kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -3913,7 +3945,7 @@
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   if (instruction->IsStringAlloc()) {
     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3946,7 +3978,7 @@
 
 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -4006,19 +4038,20 @@
 
 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t method_offset = 0;
   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
-    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
   } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kX86_64PointerSize));
     __ movq(locations->Out().AsRegister<CpuRegister>(),
             Address(locations->InAt(0).AsRegister<CpuRegister>(),
             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
   }
-  __ movq(locations->Out().AsRegister<CpuRegister>(),
-          Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
 }
 
 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
@@ -4987,10 +5020,16 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
@@ -5005,7 +5044,10 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  HInstruction* length = instruction->InputAt(1);
+  if (!length->IsEmittedAtUseSite()) {
+    locations->SetInAt(1, Location::RegisterOrConstant(length));
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -5015,8 +5057,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
-  SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -5039,12 +5080,28 @@
     codegen_->AddSlowPath(slow_path);
     __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    CpuRegister length = length_loc.AsRegister<CpuRegister>();
-    if (index_loc.IsConstant()) {
-      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-      __ cmpl(length, Immediate(value));
+    HInstruction* array_length = instruction->InputAt(1);
+    if (array_length->IsEmittedAtUseSite()) {
+      // Address the length field in the array.
+      DCHECK(array_length->IsArrayLength());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(array_len, Immediate(value));
+      } else {
+        __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+      }
+      codegen_->MaybeRecordImplicitNullCheck(array_length);
     } else {
-      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      CpuRegister length = length_loc.AsRegister<CpuRegister>();
+      if (index_loc.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+        __ cmpl(length, Immediate(value));
+      } else {
+        __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+      }
     }
     codegen_->AddSlowPath(slow_path);
     __ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5654,7 +5711,7 @@
 
 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6164,7 +6221,7 @@
 
 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
 }
@@ -6385,7 +6442,7 @@
 
       // Slow path used to mark the GC root `root`.
       SlowPathCode* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
       codegen_->AddSlowPath(slow_path);
 
       __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
@@ -6516,7 +6573,7 @@
 
   // Slow path used to mark the object `ref` when it is gray.
   SlowPathCode* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
   AddSlowPath(slow_path);
 
   // if (rb_state == ReadBarrier::gray_ptr_)
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d7cfd37..cf92d68 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -318,6 +318,12 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
   size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;
   }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6be79fa..fe9a7af 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -44,7 +44,7 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
@@ -219,7 +219,7 @@
 
   PrepareForRegisterAllocation(graph).Run();
   liveness.Analyze();
-  RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+  RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
   hook_before_codegen(graph);
 
   InternalCodeAllocator allocator;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index a849448..af0ee4e 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -21,8 +21,9 @@
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+#include "a64/disasm-a64.h"
+#include "a64/macro-assembler-a64.h"
 
 namespace art {
 namespace arm64 {
@@ -34,87 +35,88 @@
 
 static inline int VIXLRegCodeFromART(int code) {
   if (code == SP) {
-    return vixl::kSPRegInternalCode;
+    return vixl::aarch64::kSPRegInternalCode;
   }
   if (code == XZR) {
-    return vixl::kZeroRegCode;
+    return vixl::aarch64::kZeroRegCode;
   }
   return code;
 }
 
 static inline int ARTRegCodeFromVIXL(int code) {
-  if (code == vixl::kSPRegInternalCode) {
+  if (code == vixl::aarch64::kSPRegInternalCode) {
     return SP;
   }
-  if (code == vixl::kZeroRegCode) {
+  if (code == vixl::aarch64::kZeroRegCode) {
     return XZR;
   }
   return code;
 }
 
-static inline vixl::Register XRegisterFrom(Location location) {
+static inline vixl::aarch64::Register XRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
-  return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
+  return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register WRegisterFrom(Location location) {
+static inline vixl::aarch64::Register WRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
-  return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
+  return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
   DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
-static inline vixl::Register OutputRegister(HInstruction* instr) {
+static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::Register InputRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
   return RegisterFrom(instr->GetLocations()->InAt(input_index),
                       instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::FPRegister DRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
-  return vixl::FPRegister::DRegFromCode(location.reg());
+  return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister SRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
-  return vixl::FPRegister::SRegFromCode(location.reg());
+  return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
 }
 
-static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
   DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
-static inline vixl::FPRegister OutputFPRegister(HInstruction* instr) {
+static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
   return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
   return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
                         instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
-  return Primitive::IsFloatingPointType(type) ? vixl::CPURegister(FPRegisterFrom(location, type))
-                                              : vixl::CPURegister(RegisterFrom(location, type));
+static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+  return Primitive::IsFloatingPointType(type)
+      ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
+      : vixl::aarch64::CPURegister(RegisterFrom(location, type));
 }
 
-static inline vixl::CPURegister OutputCPURegister(HInstruction* instr) {
+static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
   return Primitive::IsFloatingPointType(instr->GetType())
-      ? static_cast<vixl::CPURegister>(OutputFPRegister(instr))
-      : static_cast<vixl::CPURegister>(OutputRegister(instr));
+      ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
+      : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
 }
 
-static inline vixl::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
   return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
-      ? static_cast<vixl::CPURegister>(InputFPRegisterAt(instr, index))
-      : static_cast<vixl::CPURegister>(InputRegisterAt(instr, index));
+      ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
+      : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
 }
 
 static inline int64_t Int64ConstantFrom(Location location) {
@@ -129,63 +131,70 @@
   }
 }
 
-static inline vixl::Operand OperandFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
   if (location.IsRegister()) {
-    return vixl::Operand(RegisterFrom(location, type));
+    return vixl::aarch64::Operand(RegisterFrom(location, type));
   } else {
-    return vixl::Operand(Int64ConstantFrom(location));
+    return vixl::aarch64::Operand(Int64ConstantFrom(location));
   }
 }
 
-static inline vixl::Operand InputOperandAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
   return OperandFrom(instr->GetLocations()->InAt(input_index),
                      instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::MemOperand StackOperandFrom(Location location) {
-  return vixl::MemOperand(vixl::sp, location.GetStackIndex());
+static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+  return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, size_t offset = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), offset);
+  return vixl::aarch64::MemOperand(base.X(), offset);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base,
-                                           const vixl::Register& regoffset,
-                                           vixl::Shift shift = vixl::LSL,
-                                           unsigned shift_amount = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    const vixl::aarch64::Register& regoffset,
+                                                    vixl::aarch64::Shift shift = vixl::aarch64::LSL,
+                                                    unsigned shift_amount = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return vixl::MemOperand(base.X(), regoffset, shift, shift_amount);
+  return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
 }
 
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+                                                    Offset offset) {
   return HeapOperand(base, offset.SizeValue());
 }
 
-static inline vixl::MemOperand HeapOperandFrom(Location location, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
   return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
 }
 
-static inline Location LocationFrom(const vixl::Register& reg) {
-  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.code()));
+static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
 }
 
-static inline Location LocationFrom(const vixl::FPRegister& fpreg) {
-  return Location::FpuRegisterLocation(fpreg.code());
+static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+  return Location::FpuRegisterLocation(fpreg.GetCode());
 }
 
-static inline vixl::Operand OperandFromMemOperand(const vixl::MemOperand& mem_op) {
+static inline vixl::aarch64::Operand OperandFromMemOperand(
+    const vixl::aarch64::MemOperand& mem_op) {
   if (mem_op.IsImmediateOffset()) {
-    return vixl::Operand(mem_op.offset());
+    return vixl::aarch64::Operand(mem_op.GetOffset());
   } else {
     DCHECK(mem_op.IsRegisterOffset());
-    if (mem_op.extend() != vixl::NO_EXTEND) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.extend(), mem_op.shift_amount());
-    } else if (mem_op.shift() != vixl::NO_SHIFT) {
-      return vixl::Operand(mem_op.regoffset(), mem_op.shift(), mem_op.shift_amount());
+    if (mem_op.GetExtend() != vixl::aarch64::NO_EXTEND) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetExtend(),
+                                    mem_op.GetShiftAmount());
+    } else if (mem_op.GetShift() != vixl::aarch64::NO_SHIFT) {
+      return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+                                    mem_op.GetShift(),
+                                    mem_op.GetShiftAmount());
     } else {
       LOG(FATAL) << "Should not reach here";
       UNREACHABLE();
@@ -212,13 +221,13 @@
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
-    return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
   } else if (instr->IsNeg()) {
     // Uses mov -immediate.
-    return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+    return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
   } else {
     DCHECK(instr->IsAdd() ||
-           instr->IsArm64IntermediateAddress() ||
+           instr->IsIntermediateAddress() ||
            instr->IsBoundsCheck() ||
            instr->IsCompare() ||
            instr->IsCondition() ||
@@ -227,7 +236,8 @@
     // Uses aliases of ADD/SUB instructions.
     // If `value` does not fit but `-value` does, VIXL will automatically use
     // the 'opposite' instruction.
-    return vixl::Assembler::IsImmAddSub(value) || vixl::Assembler::IsImmAddSub(-value);
+    return vixl::aarch64::Assembler::IsImmAddSub(value)
+        || vixl::aarch64::Assembler::IsImmAddSub(-value);
   }
 }
 
@@ -263,30 +273,30 @@
   return true;
 }
 
-static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
-    case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
-    case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+    case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
-      return vixl::NO_SHIFT;
+      return vixl::aarch64::NO_SHIFT;
   }
 }
 
-static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
-    case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
-    case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
-    case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
-    case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
-    case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
-    case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
     default:
       LOG(FATAL) << "Unexpected op kind " << op_kind;
       UNREACHABLE();
-      return vixl::NO_EXTEND;
+      return vixl::aarch64::NO_EXTEND;
   }
 }
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 49cfff4..e1bde7c 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -88,13 +88,207 @@
   }
 }
 
-void HDeadCodeElimination::RemoveDeadBlocks() {
-  if (graph_->HasIrreducibleLoops()) {
-    // Do not eliminate dead blocks if the graph has irreducible loops. We could
-    // support it, but that would require changes in our loop representation to handle
-    // multiple entry points. We decided it was not worth the complexity.
-    return;
+void HDeadCodeElimination::MaybeRecordSimplifyIf() {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(MethodCompilationStat::kSimplifyIf);
   }
+}
+
+static bool HasInput(HCondition* instruction, HInstruction* input) {
+  return (instruction->InputAt(0) == input) ||
+         (instruction->InputAt(1) == input);
+}
+
+static bool HasEquality(IfCondition condition) {
+  switch (condition) {
+    case kCondEQ:
+    case kCondLE:
+    case kCondGE:
+    case kCondBE:
+    case kCondAE:
+      return true;
+    case kCondNE:
+    case kCondLT:
+    case kCondGT:
+    case kCondB:
+    case kCondA:
+      return false;
+  }
+}
+
+static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstruction* right) {
+  if (left == right && !Primitive::IsFloatingPointType(left->GetType())) {
+    return condition->GetBlock()->GetGraph()->GetIntConstant(
+        HasEquality(condition->GetCondition()) ? 1 : 0);
+  }
+
+  if (!left->IsConstant() || !right->IsConstant()) {
+    return nullptr;
+  }
+
+  if (left->IsIntConstant()) {
+    return condition->Evaluate(left->AsIntConstant(), right->AsIntConstant());
+  } else if (left->IsNullConstant()) {
+    return condition->Evaluate(left->AsNullConstant(), right->AsNullConstant());
+  } else if (left->IsLongConstant()) {
+    return condition->Evaluate(left->AsLongConstant(), right->AsLongConstant());
+  } else if (left->IsFloatConstant()) {
+    return condition->Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+  } else {
+    DCHECK(left->IsDoubleConstant());
+    return condition->Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
+  }
+}
+
+// Simplify the pattern:
+//
+//        B1    B2    ...
+//       goto  goto  goto
+//         \    |    /
+//          \   |   /
+//             B3
+//     i1 = phi(input, input)
+//     (i2 = condition on i1)
+//        if i1 (or i2)
+//          /     \
+//         /       \
+//        B4       B5
+//
+// Into:
+//
+//       B1      B2    ...
+//        |      |      |
+//       B4      B5    B?
+//
+// This simplification cannot be applied for loop headers, as they
+// contain a suspend check.
+//
+// Note that we rely on the dead code elimination to get rid of B3.
+bool HDeadCodeElimination::SimplifyIfs() {
+  bool simplified_one_or_more_ifs = false;
+  bool rerun_dominance_and_loop_analysis = false;
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    HInstruction* last = block->GetLastInstruction();
+    HInstruction* first = block->GetFirstInstruction();
+    if (last->IsIf() &&
+        block->HasSinglePhi() &&
+        block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+      bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
+      bool has_only_phi_condition_and_if =
+          !has_only_phi_and_if &&
+          first->IsCondition() &&
+          HasInput(first->AsCondition(), block->GetFirstPhi()) &&
+          (first->GetNext() == last) &&
+          (last->InputAt(0) == first) &&
+          first->HasOnlyOneNonEnvironmentUse();
+
+      if (has_only_phi_and_if || has_only_phi_condition_and_if) {
+        DCHECK(!block->IsLoopHeader());
+        HPhi* phi = block->GetFirstPhi()->AsPhi();
+        bool phi_input_is_left = (first->InputAt(0) == phi);
+
+        // Walk over all inputs of the phis and update the control flow of
+        // predecessors feeding constants to the phi.
+        // Note that phi->InputCount() may change inside the loop.
+        for (size_t i = 0; i < phi->InputCount();) {
+          HInstruction* input = phi->InputAt(i);
+          HInstruction* value_to_check = nullptr;
+          if (has_only_phi_and_if) {
+            if (input->IsIntConstant()) {
+              value_to_check = input;
+            }
+          } else {
+            DCHECK(has_only_phi_condition_and_if);
+            if (phi_input_is_left) {
+              value_to_check = Evaluate(first->AsCondition(), input, first->InputAt(1));
+            } else {
+              value_to_check = Evaluate(first->AsCondition(), first->InputAt(0), input);
+            }
+          }
+          if (value_to_check == nullptr) {
+            // Could not evaluate to a constant, continue iterating over the inputs.
+            ++i;
+          } else {
+            HBasicBlock* predecessor_to_update = block->GetPredecessors()[i];
+            HBasicBlock* successor_to_update = nullptr;
+            if (value_to_check->AsIntConstant()->IsTrue()) {
+              successor_to_update = last->AsIf()->IfTrueSuccessor();
+            } else {
+              DCHECK(value_to_check->AsIntConstant()->IsFalse())
+                  << value_to_check->AsIntConstant()->GetValue();
+              successor_to_update = last->AsIf()->IfFalseSuccessor();
+            }
+            predecessor_to_update->ReplaceSuccessor(block, successor_to_update);
+            phi->RemoveInputAt(i);
+            simplified_one_or_more_ifs = true;
+            if (block->IsInLoop()) {
+              rerun_dominance_and_loop_analysis = true;
+            }
+            // For simplicity, don't create a dead block, let the dead code elimination
+            // pass deal with it.
+            if (phi->InputCount() == 1) {
+              break;
+            }
+          }
+        }
+        if (block->GetPredecessors().size() == 1) {
+          phi->ReplaceWith(phi->InputAt(0));
+          block->RemovePhi(phi);
+          if (has_only_phi_condition_and_if) {
+            // Evaluate here (and not wait for a constant folding pass) to open
+            // more opportunities for DCE.
+            HInstruction* result = first->AsCondition()->TryStaticEvaluation();
+            if (result != nullptr) {
+              first->ReplaceWith(result);
+              block->RemoveInstruction(first);
+            }
+          }
+        }
+        if (simplified_one_or_more_ifs) {
+          MaybeRecordSimplifyIf();
+        }
+      }
+    }
+  }
+  // We need to re-analyze the graph in order to run DCE afterwards.
+  if (simplified_one_or_more_ifs) {
+    if (rerun_dominance_and_loop_analysis) {
+      graph_->ClearLoopInformation();
+      graph_->ClearDominanceInformation();
+      graph_->BuildDominatorTree();
+    } else {
+      graph_->ClearDominanceInformation();
+      // We have introduced critical edges, remove them.
+      graph_->SimplifyCFG();
+      graph_->ComputeDominanceInformation();
+      graph_->ComputeTryBlockInformation();
+    }
+  }
+
+  return simplified_one_or_more_ifs;
+}
+
+void HDeadCodeElimination::ConnectSuccessiveBlocks() {
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+    HBasicBlock* block  = it.Current();
+    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
+      it.Advance();
+      continue;
+    }
+    HBasicBlock* successor = block->GetSingleSuccessor();
+    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    block->MergeWith(successor);
+    // Reiterate on this block in case it can be merged with its new successor.
+  }
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks() {
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE);
@@ -132,23 +326,7 @@
       graph_->ComputeTryBlockInformation();
     }
   }
-
-  // Connect successive blocks created by dead branches. Order does not matter.
-  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
-    HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
-      it.Advance();
-      continue;
-    }
-    HBasicBlock* successor = block->GetSingleSuccessor();
-    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
-      it.Advance();
-      continue;
-    }
-    block->MergeWith(successor);
-
-    // Reiterate on this block in case it can be merged with its new successor.
-  }
+  return removed_one_or_more_blocks;
 }
 
 void HDeadCodeElimination::RemoveDeadInstructions() {
@@ -181,7 +359,20 @@
 }
 
 void HDeadCodeElimination::Run() {
-  RemoveDeadBlocks();
+  // Do not eliminate dead blocks if the graph has irreducible loops. We could
+  // support it, but that would require changes in our loop representation to handle
+  // multiple entry points. We decided it was not worth the complexity.
+  if (!graph_->HasIrreducibleLoops()) {
+    // Simplify graph to generate more dead block patterns.
+    ConnectSuccessiveBlocks();
+    bool did_any_simplification = false;
+    did_any_simplification |= SimplifyIfs();
+    did_any_simplification |= RemoveDeadBlocks();
+    if (did_any_simplification) {
+      // Connect successive blocks created by dead branches.
+      ConnectSuccessiveBlocks();
+    }
+  }
   SsaRedundantPhiElimination(graph_).Run();
   RemoveDeadInstructions();
 }
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 8d6008b..0ce0ec1 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -41,8 +41,11 @@
 
  private:
   void MaybeRecordDeadBlock(HBasicBlock* block);
-  void RemoveDeadBlocks();
+  void MaybeRecordSimplifyIf();
+  bool RemoveDeadBlocks();
   void RemoveDeadInstructions();
+  bool SimplifyIfs();
+  void ConnectSuccessiveBlocks();
 
   DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
 };
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9d67373..0b4c569 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -31,7 +31,7 @@
 #include "nodes.h"
 #include "optimization.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -401,6 +401,9 @@
   void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
     StartAttributeStream("is_string_length") << std::boolalpha
         << array_length->IsStringLength() << std::noboolalpha;
+    if (array_length->IsEmittedAtUseSite()) {
+      StartAttributeStream("emitted_at_use") << "true";
+    }
   }
 
   void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 6c1292c..a592162 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -35,7 +35,7 @@
 #include "nodes.h"
 #include "optimizing_compiler.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "quick/inline_method_analyser.h"
 #include "sharpening.h"
 #include "ssa_builder.h"
@@ -208,12 +208,8 @@
     DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
     // TODO: deal with proxy classes.
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
     index = cls->GetDexTypeIndex();
-  } else {
-    index = cls->FindTypeIndexInOtherDexFile(dex_file);
-  }
-
-  if (index != DexFile::kDexNoIndex) {
     // Update the dex cache to ensure the class is in. The generated code will
     // consider it is. We make it safe by updating the dex cache, as other
     // dex files might also load the class, and there is no guarantee the dex
@@ -221,6 +217,14 @@
     if (dex_cache->GetResolvedType(index) == nullptr) {
       dex_cache->SetResolvedType(index, cls);
     }
+  } else {
+    index = cls->FindTypeIndexInOtherDexFile(dex_file);
+    // We cannot guarantee the entry in the dex cache will resolve to the same class,
+    // as there may be different class loaders. So only return the index if it's
+    // the right class in the dex cache already.
+    if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) {
+      index = DexFile::kDexNoIndex;
+    }
   }
 
   return index;
@@ -273,7 +277,7 @@
       return false;
     }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
-    mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
+    mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
         : class_linker->FindDexCache(soa.Self(), *ref.dex_file);
     resolved_method = dex_cache->GetResolvedMethod(
@@ -657,7 +661,7 @@
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
       new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
-          method_index % ImTable::kSize, pointer_size);
+          method_index, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
@@ -804,8 +808,6 @@
 bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
                                  ArtMethod* method,
                                  HInstruction** return_replacement) {
-  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-
   if (method->IsProxyMethod()) {
     VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of unimplemented inline support for proxy methods.";
@@ -828,15 +830,6 @@
     return false;
   }
 
-  uint32_t method_index = FindMethodIndexIn(
-      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
-  if (method_index == DexFile::kDexNoIndex) {
-    VLOG(compiler) << "Call to "
-                   << PrettyMethod(method)
-                   << " cannot be inlined because unaccessible to caller";
-    return false;
-  }
-
   bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
 
   const DexFile::CodeItem* code_item = method->GetCodeItem();
@@ -873,7 +866,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+      VLOG(compiler) << "Method " << PrettyMethod(method)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
     }
@@ -883,7 +876,7 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is static and requires a clinit"
                    << " check that cannot be emitted due to Dex cache limitations";
     return false;
@@ -893,7 +886,7 @@
     return false;
   }
 
-  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, caller_dex_file);
+  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
   MaybeRecordStat(kInlinedInvoke);
   return true;
 }
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index b412529..afac5f9 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,6 +16,7 @@
 
 #include "instruction_builder.h"
 
+#include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
 #include "driver/compiler_options.h"
@@ -890,7 +891,7 @@
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           resolved_method->GetDexMethodIndex());
+                                           resolved_method->GetImtIndex());
   }
 
   return HandleInvoke(invoke,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e0410dc..4ca0600 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -920,6 +920,7 @@
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
+  bool integral_type = Primitive::IsIntegralType(instruction->GetType());
   if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
     // Replace code looking like
     //    ADD dst, src, 0
@@ -928,7 +929,7 @@
     // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When
     // `x` is `-0.0`, the former expression yields `0.0`, while the later
     // yields `-0.0`.
-    if (Primitive::IsIntegralType(instruction->GetType())) {
+    if (integral_type) {
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
@@ -974,10 +975,31 @@
   // so no need to return.
   TryHandleAssociativeAndCommutativeOperation(instruction);
 
-  if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) &&
+  if ((left->IsSub() || right->IsSub()) &&
       TrySubtractionChainSimplification(instruction)) {
     return;
   }
+
+  if (integral_type) {
+    // Replace code patterns looking like
+    //    SUB dst1, x, y        SUB dst1, x, y
+    //    ADD dst2, dst1, y     ADD dst2, y, dst1
+    // with
+    //    SUB dst1, x, y
+    // ADD instruction is not needed in this case, we may use
+    // one of inputs of SUB instead.
+    if (left->IsSub() && left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (right->IsSub() && right->InputAt(1) == left) {
+      instruction->ReplaceWith(right->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+  }
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -1511,6 +1533,29 @@
   if (TrySubtractionChainSimplification(instruction)) {
     return;
   }
+
+  if (left->IsAdd()) {
+    // Replace code patterns looking like
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    SUB dst2, dst1, y     SUB dst2, dst1, x
+    // with
+    //    ADD dst1, x, y
+    // SUB instruction is not needed in this case, we may use
+    // one of inputs of ADD instead.
+    // It is applicable to integral types only.
+    DCHECK(Primitive::IsIntegralType(type));
+    if (left->InputAt(1) == right) {
+      instruction->ReplaceWith(left->InputAt(0));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (left->InputAt(0) == right) {
+      instruction->ReplaceWith(left->InputAt(1));
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+  }
 }
 
 void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index cd026b8..495f3fd 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
+#include "code_generator.h"
 #include "instruction_simplifier_arm.h"
 #include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace arm {
@@ -38,6 +40,46 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  Primitive::Type type = instruction->GetType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+  Primitive::Type type = instruction->GetComponentType();
+
+  if (type == Primitive::kPrimLong
+      || type == Primitive::kPrimFloat
+      || type == Primitive::kPrimDouble) {
+    // T32 doesn't support ShiftedRegOffset mem address mode for these types
+    // to enable optimization.
+    return;
+  }
+
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
+}
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 14c940e..3d297da 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -38,6 +38,8 @@
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitOr(HOr* instruction) OVERRIDE;
   void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 983d31d..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -28,56 +28,6 @@
 using helpers::HasShifterOperand;
 using helpers::ShifterOperandSupportsExtension;
 
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
-                                                                     HInstruction* array,
-                                                                     HInstruction* index,
-                                                                     size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HArm64IntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return;
-  }
-  if (index->IsConstant() ||
-      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
-    // When the index is a constant all the addressing can be fitted in the
-    // memory access instruction, so do not split the access.
-    return;
-  }
-  if (access->IsArraySet() &&
-      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
-    // The access may require a runtime call or the original array pointer.
-    return;
-  }
-
-  // Proceed to extract the base address computation.
-  ArenaAllocator* arena = GetGraph()->GetArena();
-
-  HIntConstant* offset = GetGraph()->GetIntConstant(data_offset);
-  HArm64IntermediateAddress* address =
-      new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
-  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
-  access->GetBlock()->InsertInstructionBefore(address, access);
-  access->ReplaceInput(address, 0);
-  // Both instructions must depend on GC to prevent any instruction that can
-  // trigger GC to be inserted between the two.
-  access->AddSideEffects(SideEffects::DependsOnGC());
-  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
-  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
-  // is an HArm64IntermediateAddress and generate appropriate code.
-  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
-  // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
-  // not bring any advantages yet.
-  // Also see the comments in
-  // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
-  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
-  RecordSimplification();
-}
-
 bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
                                                                    HInstruction* bitfield_op,
                                                                    bool do_merge) {
@@ -190,19 +140,23 @@
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               data_offset);
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
   size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
   size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
-  TryExtractArrayAccessAddress(instruction,
-                               instruction->GetArray(),
-                               instruction->GetIndex(),
-                               data_offset);
+  if (TryExtractArrayAccessAddress(instruction,
+                                   instruction->GetArray(),
+                                   instruction->GetIndex(),
+                                   data_offset)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4735f85..28648b3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,10 +35,6 @@
     }
   }
 
-  void TryExtractArrayAccessAddress(HInstruction* access,
-                                    HInstruction* array,
-                                    HInstruction* index,
-                                    size_t data_offset);
   bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
   bool TryMergeIntoShifterOperand(HInstruction* use,
                                   HInstruction* bitfield_op,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dab1ebc..8f7778f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -226,4 +226,59 @@
   return false;
 }
 
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset) {
+  if (kEmitCompilerReadBarrier) {
+    // The read barrier instrumentation does not support the
+    // HIntermediateAddress instruction yet.
+    //
+    // TODO: Handle this case properly in the ARM64 and ARM code generator and
+    // re-enable this optimization; otherwise, remove this TODO.
+    // b/26601270
+    return false;
+  }
+  if (index->IsConstant() ||
+      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+    // When the index is a constant all the addressing can be fitted in the
+    // memory access instruction, so do not split the access.
+    return false;
+  }
+  if (access->IsArraySet() &&
+      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+    // The access may require a runtime call or the original array pointer.
+    return false;
+  }
+
+  // Proceed to extract the base address computation.
+  HGraph* graph = access->GetBlock()->GetGraph();
+  ArenaAllocator* arena = graph->GetArena();
+
+  HIntConstant* offset = graph->GetIntConstant(data_offset);
+  HIntermediateAddress* address =
+      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 0);
+  // Both instructions must depend on GC to prevent any instruction that can
+  // trigger GC to be inserted between the two.
+  access->AddSideEffects(SideEffects::DependsOnGC());
+  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+  // is an HIntermediateAddress and generate appropriate code.
+  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+  // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+  // because these new instructions would not bring any advantages yet.
+  // Also see the comments in
+  // `InstructionCodeGeneratorARM::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM::VisitArraySet()`
+  // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+  return true;
+}
+
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index b1fe8f4..56804f5 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -26,6 +26,11 @@
 // a negated bitwise instruction.
 bool TryMergeNegatedInput(HBinaryOperation* op);
 
+bool TryExtractArrayAccessAddress(HInstruction* access,
+                                  HInstruction* array,
+                                  HInstruction* index,
+                                  size_t data_offset);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 579fb9d..5ab9389 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1212,7 +1212,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1232,7 +1232,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1250,7 +1250,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1280,7 +1280,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1307,7 +1307,7 @@
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1665,7 +1665,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
@@ -1692,7 +1692,7 @@
   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   const InvokeRuntimeCallingConvention calling_convention;
 
@@ -1979,6 +1979,51 @@
   __ revsh(out, in);
 }
 
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
+  Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
+  SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+  DRegister tmp_d = FromLowSToD(tmp_s);
+  Register  out_r = locations->Out().AsRegister<Register>();
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ vmovdrr(tmp_d, src_1, src_0);                         // Temp DReg |--src_1|--src_0|
+  __ vcntd(tmp_d, tmp_d);                                  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true);     // Temp DReg |--c|--c|--c|--c|
+  __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true);    // Temp DReg |------c|------c|
+  if (is_long) {
+    __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true);  // Temp DReg |--------------c|
+  }
+  __ vmovrs(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
 void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kNoCall,
@@ -2119,8 +2164,6 @@
   __ Lsr(out, out, 5);
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1d50753..987d3f8 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,10 +28,14 @@
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/arm64/constants_arm64.h"
 
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
-using namespace vixl;   // NOLINT(build/namespaces)
+// TODO: make vixl clean wrt -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "a64/disasm-a64.h"
+#include "a64/macro-assembler-a64.h"
+#pragma GCC diagnostic pop
 
 namespace art {
 
@@ -57,7 +61,7 @@
 
 }  // namespace
 
-vixl::MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
+MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
   return codegen_->GetAssembler()->vixl_masm_;
 }
 
@@ -170,14 +174,14 @@
   locations->SetOut(Location::RequiresFpuRegister());
 }
 
-static void MoveFPToInt(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
 }
 
-static void MoveIntToFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location input = locations->InAt(0);
   Location output = locations->Out();
   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
@@ -222,7 +226,7 @@
 
 static void GenReverseBytes(LocationSummary* locations,
                             Primitive::Type type,
-                            vixl::MacroAssembler* masm) {
+                            MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -276,7 +280,7 @@
 
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     Primitive::Type type,
-                                    vixl::MacroAssembler* masm) {
+                                    MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -303,7 +307,7 @@
 
 static void GenNumberOfTrailingZeros(LocationSummary* locations,
                                      Primitive::Type type,
-                                     vixl::MacroAssembler* masm) {
+                                     MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -331,7 +335,7 @@
 
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
-                       vixl::MacroAssembler* masm) {
+                       MacroAssembler* masm) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   Location in = locations->InAt(0);
@@ -356,7 +360,7 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenBitCount(HInvoke* instr, Primitive::Type type, vixl::MacroAssembler* masm) {
+static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
   DCHECK(Primitive::IsIntOrLongType(type)) << type;
   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
@@ -397,7 +401,7 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
 
@@ -433,7 +437,7 @@
 
 static void GenAbsInteger(LocationSummary* locations,
                           bool is64bit,
-                          vixl::MacroAssembler* masm) {
+                          MacroAssembler* masm) {
   Location in = locations->InAt(0);
   Location output = locations->Out();
 
@@ -463,7 +467,7 @@
 static void GenMinMaxFP(LocationSummary* locations,
                         bool is_min,
                         bool is_double,
-                        vixl::MacroAssembler* masm) {
+                        MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -523,7 +527,7 @@
 static void GenMinMax(LocationSummary* locations,
                       bool is_min,
                       bool is_long,
-                      vixl::MacroAssembler* masm) {
+                      MacroAssembler* masm) {
   Location op1 = locations->InAt(0);
   Location op2 = locations->InAt(1);
   Location out = locations->Out();
@@ -574,7 +578,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -584,7 +588,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -594,7 +598,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -604,7 +608,7 @@
 
 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
@@ -617,7 +621,7 @@
   locations->AddTemp(Location::RequiresFpuRegister());
 }
 
-static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler* masm) {
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
   // Java 8 API definition for Math.round():
   // Return the closest long or int to the argument, with ties rounding to positive infinity.
   //
@@ -635,13 +639,13 @@
   FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
   FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
-  vixl::Label done;
+  vixl::aarch64::Label done;
 
   // Round to nearest integer, ties away from zero.
   __ Fcvtas(out_reg, in_reg);
 
   // For positive values, zero or NaN inputs, rounding is done.
-  __ Tbz(out_reg, out_reg.size() - 1, &done);
+  __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
 
   // Handle input < 0 cases.
   // If input is negative but not a tie, previous result (round to nearest) is valid.
@@ -675,7 +679,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -685,7 +689,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -695,7 +699,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -705,7 +709,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -723,7 +727,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -733,7 +737,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -743,7 +747,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -753,7 +757,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
 }
@@ -778,7 +782,7 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
   Location base_loc = locations->InAt(1);
   Register base = WRegisterFrom(base_loc);      // Object pointer.
   Location offset_loc = locations->InAt(2);
@@ -912,7 +916,7 @@
                          bool is_volatile,
                          bool is_ordered,
                          CodeGeneratorARM64* codegen) {
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
@@ -1031,7 +1035,7 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+  MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
 
@@ -1070,7 +1074,7 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  vixl::Label loop_head, exit_loop;
+  vixl::aarch64::Label loop_head, exit_loop;
   __ Bind(&loop_head);
   // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
   // the reference stored in the object before attempting the CAS,
@@ -1154,7 +1158,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register str = XRegisterFrom(locations->InAt(0));
@@ -1165,9 +1169,9 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   Register temp2 = WRegisterFrom(locations->GetTemp(2));
 
-  vixl::Label loop;
-  vixl::Label find_char_diff;
-  vixl::Label end;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label find_char_diff;
+  vixl::aarch64::Label end;
 
   // Get offsets of count and value fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1269,7 +1273,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register str = WRegisterFrom(locations->InAt(0));
@@ -1281,10 +1285,10 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::Label loop;
-  vixl::Label end;
-  vixl::Label return_true;
-  vixl::Label return_false;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label end;
+  vixl::aarch64::Label return_true;
+  vixl::aarch64::Label return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1357,7 +1361,7 @@
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
-                                       vixl::MacroAssembler* masm,
+                                       MacroAssembler* masm,
                                        CodeGeneratorARM64* codegen,
                                        ArenaAllocator* allocator,
                                        bool start_at_zero) {
@@ -1405,7 +1409,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1425,7 +1429,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
   // best to align the inputs accordingly.
@@ -1443,7 +1447,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1454,7 +1458,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register byte_array = WRegisterFrom(locations->InAt(0));
@@ -1473,7 +1477,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1483,7 +1487,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
 
   // No need to emit code checking whether `locations->InAt(2)` is a null
   // pointer, as callers of the native method
@@ -1500,7 +1504,7 @@
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1508,7 +1512,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   Register string_to_copy = WRegisterFrom(locations->InAt(0));
@@ -1531,7 +1535,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
@@ -1546,7 +1550,7 @@
   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
 
   LocationSummary* const locations = new (arena) LocationSummary(invoke,
-                                                                 LocationSummary::kCall,
+                                                                 LocationSummary::kCallOnMainOnly,
                                                                  kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
 
@@ -1556,7 +1560,7 @@
 }
 
 static void GenFPToFPCall(HInvoke* invoke,
-                          vixl::MacroAssembler* masm,
+                          MacroAssembler* masm,
                           CodeGeneratorARM64* codegen,
                           QuickEntrypointEnum entry) {
   __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
@@ -1716,7 +1720,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -1756,9 +1760,9 @@
   __ Sub(num_chr, srcEnd, srcBegin);
 
   // Do the copy.
-  vixl::Label loop;
-  vixl::Label done;
-  vixl::Label remainder;
+  vixl::aarch64::Label loop;
+  vixl::aarch64::Label done;
+  vixl::aarch64::Label remainder;
 
   // Early out for valid zero-length retrievals.
   __ Cbz(num_chr, &done);
@@ -1773,9 +1777,9 @@
   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   __ Bind(&loop);
-  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, vixl::PostIndex));
+  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
   __ Subs(num_chr, num_chr, 8);
-  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, vixl::PostIndex));
+  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
   __ B(ge, &loop);
 
   __ Adds(num_chr, num_chr, 8);
@@ -1784,9 +1788,9 @@
   // Main loop for < 8 character case and remainder handling. Loads and stores one
   // 16-bit Java character at a time.
   __ Bind(&remainder);
-  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, vixl::PostIndex));
+  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
   __ Subs(num_chr, num_chr, 1);
-  __ Strh(tmp1, MemOperand(dst_ptr, char_size, vixl::PostIndex));
+  __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   __ B(gt, &remainder);
 
   __ Bind(&done);
@@ -1800,7 +1804,7 @@
                                                uint32_t at,
                                                HInstruction* input) {
   HIntConstant* const_input = input->AsIntConstant();
-  if (const_input != nullptr && !vixl::Assembler::IsImmAddSub(const_input->GetValue())) {
+  if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
     locations->SetInAt(at, Location::RequiresRegister());
   } else {
     locations->SetInAt(at, Location::RegisterOrConstant(input));
@@ -1847,7 +1851,7 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
-static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
+static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
                                          const Location& pos,
                                          const Register& input,
                                          const Location& length,
@@ -1880,7 +1884,7 @@
   } else {
     // Check that pos >= 0.
     Register pos_reg = WRegisterFrom(pos);
-    __ Tbnz(pos_reg, pos_reg.size() - 1, slow_path->GetEntryLabel());
+    __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
 
     // Check that pos <= length(input) && (length(input) - pos) >= length.
     __ Ldr(temp, MemOperand(input, length_offset));
@@ -1893,7 +1897,7 @@
 
 // Compute base source address, base destination address, and end source address
 // for System.arraycopy* intrinsics.
-static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm,
+static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
                                         Primitive::Type type,
                                         const Register& src,
                                         const Location& src_pos,
@@ -1934,7 +1938,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
   Register src = XRegisterFrom(locations->InAt(0));
   Location src_pos = locations->InAt(1);
@@ -2007,12 +2011,12 @@
   const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   UseScratchRegisterScope temps(masm);
   Register tmp = temps.AcquireW();
-  vixl::Label loop, done;
+  vixl::aarch64::Label loop, done;
   __ Bind(&loop);
   __ Cmp(src_curr_addr, src_stop_addr);
   __ B(&done, eq);
-  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, vixl::PostIndex));
-  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, vixl::PostIndex));
+  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
   __ B(&loop);
   __ Bind(&done);
 
@@ -2088,7 +2092,7 @@
   // intrinsic and re-enable it (b/29516905).
   DCHECK(!kEmitCompilerReadBarrier);
 
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -2107,7 +2111,7 @@
   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
   codegen_->AddSlowPath(slow_path);
 
-  vixl::Label conditions_on_positions_validated;
+  vixl::aarch64::Label conditions_on_positions_validated;
   SystemArrayCopyOptimizations optimizations(invoke);
 
   // If source and destination are the same, we go to slow path if we need to do
@@ -2230,7 +2234,7 @@
       __ Cmp(temp1, temp2);
 
       if (optimizations.GetDestinationIsTypedObjectArray()) {
-        vixl::Label do_copy;
+        vixl::aarch64::Label do_copy;
         __ B(&do_copy, eq);
         if (!did_unpoison) {
           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
@@ -2278,15 +2282,15 @@
 
     // Iterate over the arrays and do a raw copy of the objects. We don't need to
     // poison/unpoison.
-    vixl::Label loop, done;
+    vixl::aarch64::Label loop, done;
     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
     __ Bind(&loop);
     __ Cmp(src_curr_addr, src_stop_addr);
     __ B(&done, eq);
     {
       Register tmp = temps.AcquireW();
-      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, vixl::PostIndex));
-      __ Str(tmp, MemOperand(dst_curr_addr, element_size, vixl::PostIndex));
+      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+      __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
     }
     __ B(&loop);
     __ Bind(&done);
@@ -2299,7 +2303,7 @@
 
 static void GenIsInfinite(LocationSummary* locations,
                           bool is64bit,
-                          vixl::MacroAssembler* masm) {
+                          MacroAssembler* masm) {
   Operand infinity;
   Register out;
 
@@ -2311,7 +2315,7 @@
     out = WRegisterFrom(locations->Out());
   }
 
-  const Register zero = vixl::Assembler::AppropriateZeroRegFor(out);
+  const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
 
   MoveFPToInt(locations, is64bit, masm);
   __ Eor(out, out, infinity);
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index d47448a..5251536 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -20,10 +20,11 @@
 #include "intrinsics.h"
 
 namespace vixl {
+namespace aarch64 {
 
 class MacroAssembler;
 
-}  // namespace vixl
+}}  // namespace vixl::aarch64
 
 namespace art {
 
@@ -73,7 +74,7 @@
 #undef OPTIMIZING_INTRINSICS
 
  private:
-  vixl::MacroAssembler* GetVIXLAssembler();
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler();
 
   ArenaAllocator* GetAllocator();
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index d4f44d6..0bfa025 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1875,7 +1875,7 @@
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2071,7 +2071,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2096,7 +2096,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -2122,7 +2122,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2155,7 +2155,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2187,7 +2187,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cc4971b..dfaa84e 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1519,7 +1519,7 @@
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1707,7 +1707,7 @@
 // int java.lang.String.indexOf(int ch)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1728,7 +1728,7 @@
 // int java.lang.String.indexOf(int ch, int fromIndex)
 void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   // We have a hand-crafted assembly stub that follows the runtime
   // calling convention. So it's best to align the inputs accordingly.
@@ -1748,7 +1748,7 @@
 // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1783,7 +1783,7 @@
 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1816,7 +1816,7 @@
 // java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1879,6 +1879,84 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
+static void GenHighestOneBit(LocationSummary* locations,
+                             Primitive::Type type,
+                             Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dclz(TMP, in);
+    __ LoadConst64(AT, INT64_C(0x8000000000000000));
+    __ Dsrlv(out, AT, TMP);
+  } else {
+    __ Clz(TMP, in);
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(out, AT, TMP);
+  }
+  // For either value of "type", when "in" is zero, "out" should also
+  // be zero. Without this extra "and" operation, when "in" is zero,
+  // "out" would be either Integer.MIN_VALUE, or Long.MIN_VALUE because
+  // the MIPS logical shift operations "dsrlv", and "srlv" don't use
+  // the shift amount (TMP) directly; they use either (TMP % 64) or
+  // (TMP % 32), respectively.
+  __ And(out, out, in);
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+                            Primitive::Type type,
+                            Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ Dsubu(TMP, ZERO, in);
+  } else {
+    __ Subu(TMP, ZERO, in);
+  }
+  __ And(out, TMP, in);
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
@@ -1902,11 +1980,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongLowestOneBit)
-
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 812bdf5..6c81421 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -706,7 +706,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -774,7 +774,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
@@ -831,7 +831,7 @@
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -985,7 +985,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1216,7 +1216,7 @@
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1490,7 +1490,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1518,7 +1518,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1543,7 +1543,7 @@
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 891aaf5..28f1f4f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -526,7 +526,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -588,7 +588,7 @@
 
   // We have to fall back to a call to the intrinsic.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall);
+                                                           LocationSummary::kCallOnMainOnly);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(RAX));
@@ -699,7 +699,7 @@
 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
                                       HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -839,7 +839,7 @@
 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
                                         HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kCall,
+                                                           LocationSummary::kCallOnMainOnly,
                                                            kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1303,7 +1303,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1577,7 +1577,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1606,7 +1606,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1632,7 +1632,7 @@
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            LocationSummary::kCallOnMainOnly,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 3f27c91..7a78bfd 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -481,7 +481,7 @@
   enum CallKind {
     kNoCall,
     kCallOnSlowPath,
-    kCall
+    kCallOnMainOnly
   };
 
   LocationSummary(HInstruction* instruction,
@@ -541,7 +541,7 @@
   Location Out() const { return output_; }
 
   bool CanCall() const { return call_kind_ != kNoCall; }
-  bool WillCall() const { return call_kind_ == kCall; }
+  bool WillCall() const { return call_kind_ == kCallOnMainOnly; }
   bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
   bool NeedsSafepoint() const { return CanCall(); }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f0ef26..23ac457 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1289,7 +1289,8 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
   M(BitwiseNegatedRight, Instruction)                                   \
-  M(MultiplyAccumulate, Instruction)
+  M(MultiplyAccumulate, Instruction)                                    \
+  M(IntermediateAddress, Instruction)
 #endif
 
 #ifndef ART_ENABLE_CODEGEN_arm
@@ -1303,8 +1304,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64DataProcWithShifterOp, Instruction)
 #endif
 
 #ifndef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 06b073c..3f88717 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -94,32 +94,6 @@
 
 std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
 
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress FINAL : public HExpression<2> {
- public:
-  HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
-    SetRawInputAt(0, base_address);
-    SetRawInputAt(1, offset);
-  }
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
-  }
-  bool IsActualObject() const OVERRIDE { return false; }
-
-  HInstruction* GetBaseAddress() const { return InputAt(0); }
-  HInstruction* GetOffset() const { return InputAt(1); }
-
-  DECLARE_INSTRUCTION(Arm64IntermediateAddress);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index f2d5cf3..8bd8667 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -113,6 +113,34 @@
   DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
 };
 
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+  HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+    SetRawInputAt(0, base_address);
+    SetRawInputAt(1, offset);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+  bool IsActualObject() const OVERRIDE { return false; }
+
+  HInstruction* GetBaseAddress() const { return InputAt(0); }
+  HInstruction* GetOffset() const { return InputAt(1); }
+
+  DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d703b0f..aedfcb4 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -37,6 +37,10 @@
 #include "pc_relative_fixups_x86.h"
 #endif
 
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#include "x86_memory_gen.h"
+#endif
+
 #include "art_method-inl.h"
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
@@ -77,7 +81,7 @@
 #include "oat_quick_method_header.h"
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "select_generator.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
@@ -301,6 +305,18 @@
       OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+ protected:
+  virtual void RunOptimizations(HGraph* graph,
+                                CodeGenerator* codegen,
+                                CompilerDriver* driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                PassObserver* pass_observer,
+                                StackHandleScopeCollection* handles) const;
+
+  virtual void RunOptimizations(HOptimization* optimizations[],
+                                size_t length,
+                                PassObserver* pass_observer) const;
+
  private:
   // Create a 'CompiledMethod' for an optimized graph.
   CompiledMethod* Emit(ArenaAllocator* arena,
@@ -329,6 +345,18 @@
                             ArtMethod* method,
                             bool osr) const;
 
+  void MaybeRunInliner(HGraph* graph,
+                       CodeGenerator* codegen,
+                       CompilerDriver* driver,
+                       const DexCompilationUnit& dex_compilation_unit,
+                       PassObserver* pass_observer,
+                       StackHandleScopeCollection* handles) const;
+
+  void RunArchOptimizations(InstructionSet instruction_set,
+                            HGraph* graph,
+                            CodeGenerator* codegen,
+                            PassObserver* pass_observer) const;
+
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
   std::unique_ptr<std::ostream> visualizer_output_;
@@ -392,22 +420,22 @@
       || instruction_set == kX86_64;
 }
 
-static void RunOptimizations(HOptimization* optimizations[],
-                             size_t length,
-                             PassObserver* pass_observer) {
+void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
+                                          size_t length,
+                                          PassObserver* pass_observer) const {
   for (size_t i = 0; i < length; ++i) {
     PassScope scope(optimizations[i]->GetPassName(), pass_observer);
     optimizations[i]->Run();
   }
 }
 
-static void MaybeRunInliner(HGraph* graph,
-                            CodeGenerator* codegen,
-                            CompilerDriver* driver,
-                            OptimizingCompilerStats* stats,
-                            const DexCompilationUnit& dex_compilation_unit,
-                            PassObserver* pass_observer,
-                            StackHandleScopeCollection* handles) {
+void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
+                                         CodeGenerator* codegen,
+                                         CompilerDriver* driver,
+                                         const DexCompilationUnit& dex_compilation_unit,
+                                         PassObserver* pass_observer,
+                                         StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
   bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
       && (compiler_options.GetInlineMaxCodeUnits() > 0);
@@ -431,11 +459,11 @@
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
 }
 
-static void RunArchOptimizations(InstructionSet instruction_set,
-                                 HGraph* graph,
-                                 CodeGenerator* codegen,
-                                 OptimizingCompilerStats* stats,
-                                 PassObserver* pass_observer) {
+void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
+                                              HGraph* graph,
+                                              CodeGenerator* codegen,
+                                              PassObserver* pass_observer) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -444,8 +472,12 @@
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
+      SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+      GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
       HOptimization* arm_optimizations[] = {
         simplifier,
+        side_effects,
+        gvn,
         fixups
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -485,13 +517,27 @@
     case kX86: {
       x86::PcRelativeFixups* pc_relative_fixups =
           new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
       HOptimization* x86_optimizations[] = {
-          pc_relative_fixups
+          pc_relative_fixups,
+          memory_gen
       };
       RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64: {
+      x86::X86MemoryOperandGeneration* memory_gen =
+          new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+      HOptimization* x86_64_optimizations[] = {
+          memory_gen
+      };
+      RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer);
+      break;
+    }
+#endif
     default:
       break;
   }
@@ -513,17 +559,17 @@
   }
   {
     PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+    RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
   }
 }
 
-static void RunOptimizations(HGraph* graph,
-                             CodeGenerator* codegen,
-                             CompilerDriver* driver,
-                             OptimizingCompilerStats* stats,
-                             const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
+void OptimizingCompiler::RunOptimizations(HGraph* graph,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* driver,
+                                          const DexCompilationUnit& dex_compilation_unit,
+                                          PassObserver* pass_observer,
+                                          StackHandleScopeCollection* handles) const {
+  OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -556,7 +602,7 @@
   };
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+  MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles);
 
   HOptimization* optimizations2[] = {
     // SelectGenerator depends on the InstructionSimplifier removing
@@ -579,7 +625,7 @@
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
-  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, stats, pass_observer);
+  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
@@ -791,7 +837,6 @@
     RunOptimizations(graph,
                      codegen.get(),
                      compiler_driver,
-                     compilation_stats_.get(),
                      dex_compilation_unit,
                      &pass_observer,
                      &handles);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 9cc6ea4..c8d1ce0 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -65,6 +65,7 @@
   kInlinedInvokeVirtualOrInterface,
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
+  kSimplifyIf,
   kLastStat
 };
 
@@ -143,6 +144,7 @@
       case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
       case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
+      case kSimplifyIf: name = "SimplifyIf"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
new file mode 100644
index 0000000..3450286
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocation_resolver.h"
+
+#include "code_generator.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator,
+                                                       CodeGenerator* codegen,
+                                                       const SsaLivenessAnalysis& liveness)
+      : allocator_(allocator),
+        codegen_(codegen),
+        liveness_(liveness) {}
+
+void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+                                         size_t max_safepoint_live_fp_regs,
+                                         size_t reserved_out_slots,
+                                         size_t int_spill_slots,
+                                         size_t long_spill_slots,
+                                         size_t float_spill_slots,
+                                         size_t double_spill_slots,
+                                         size_t catch_phi_spill_slots,
+                                         const ArenaVector<LiveInterval*>& temp_intervals) {
+  size_t spill_slots = int_spill_slots
+                     + long_spill_slots
+                     + float_spill_slots
+                     + double_spill_slots
+                     + catch_phi_spill_slots;
+
+  // Computes frame size and spill mask.
+  codegen_->InitializeCodeGeneration(spill_slots,
+                                     max_safepoint_live_core_regs,
+                                     max_safepoint_live_fp_regs,
+                                     reserved_out_slots,  // Includes slot(s) for the art method.
+                                     codegen_->GetGraph()->GetLinearOrder());
+
+  // Resolve outputs, including stack locations.
+  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    LiveInterval* current = instruction->GetLiveInterval();
+    LocationSummary* locations = instruction->GetLocations();
+    Location location = locations->Out();
+    if (instruction->IsParameterValue()) {
+      // Now that we know the frame size, adjust the parameter's location.
+      if (location.IsStackSlot()) {
+        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (location.IsDoubleStackSlot()) {
+        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+        current->SetSpillSlot(location.GetStackIndex());
+        locations->UpdateOut(location);
+      } else if (current->HasSpillSlot()) {
+        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+      }
+    } else if (instruction->IsCurrentMethod()) {
+      // The current method is always at offset 0.
+      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+      DCHECK(current->HasSpillSlot());
+      size_t slot = current->GetSpillSlot()
+                    + spill_slots
+                    + reserved_out_slots
+                    - catch_phi_spill_slots;
+      current->SetSpillSlot(slot * kVRegSize);
+    } else if (current->HasSpillSlot()) {
+      // Adjust the stack slot, now that we know the number of them for each type.
+      // The way this implementation lays out the stack is the following:
+      // [parameter slots       ]
+      // [catch phi spill slots ]
+      // [double spill slots    ]
+      // [long spill slots      ]
+      // [float spill slots     ]
+      // [int/ref values        ]
+      // [maximum out values    ] (number of arguments for calls)
+      // [art method            ].
+      size_t slot = current->GetSpillSlot();
+      switch (current->GetType()) {
+        case Primitive::kPrimDouble:
+          slot += long_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimLong:
+          slot += float_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimFloat:
+          slot += int_spill_slots;
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          slot += reserved_out_slots;
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+      }
+      current->SetSpillSlot(slot * kVRegSize);
+    }
+
+    Location source = current->ToLocation();
+
+    if (location.IsUnallocated()) {
+      if (location.GetPolicy() == Location::kSameAsFirstInput) {
+        if (locations->InAt(0).IsUnallocated()) {
+          locations->SetInAt(0, source);
+        } else {
+          DCHECK(locations->InAt(0).Equals(source));
+        }
+      }
+      locations->UpdateOut(source);
+    } else {
+      DCHECK(source.Equals(location));
+    }
+  }
+
+  // Connect siblings and resolve inputs.
+  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    ConnectSiblings(instruction->GetLiveInterval(),
+                    max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+  }
+
+  // Resolve non-linear control flow across branches. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // Instructions live at the top of catch blocks or irreducible loop header
+      // were forced to spill.
+      if (kIsDebugBuild) {
+        BitVector* live = liveness_.GetLiveInSet(*block);
+        for (uint32_t idx : live->Indexes()) {
+          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+          LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
+          // `GetSiblingAt` returns the sibling that contains a position, but there could be
+          // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
+          // position.
+          if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
+            DCHECK(!sibling->HasRegister());
+          }
+        }
+      }
+    } else {
+      BitVector* live = liveness_.GetLiveInSet(*block);
+      for (uint32_t idx : live->Indexes()) {
+        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+        for (HBasicBlock* predecessor : block->GetPredecessors()) {
+          ConnectSplitSiblings(interval, predecessor, block);
+        }
+      }
+    }
+  }
+
+  // Resolve phi inputs. Order does not matter.
+  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* current = it.Current();
+    if (current->IsCatchBlock()) {
+      // Catch phi values are set at runtime by the exception delivery mechanism.
+    } else {
+      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* phi = inst_it.Current();
+        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+          HBasicBlock* predecessor = current->GetPredecessors()[i];
+          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+          HInstruction* input = phi->InputAt(i);
+          Location source = input->GetLiveInterval()->GetLocationAt(
+              predecessor->GetLifetimeEnd() - 1);
+          Location destination = phi->GetLiveInterval()->ToLocation();
+          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+        }
+      }
+    }
+  }
+
+  // Resolve temp locations.
+  for (LiveInterval* temp : temp_intervals) {
+    if (temp->IsHighInterval()) {
+      // High intervals can be skipped, they are already handled by the low interval.
+      continue;
+    }
+    HInstruction* at = liveness_.GetTempUser(temp);
+    size_t temp_index = liveness_.GetTempIndex(temp);
+    LocationSummary* locations = at->GetLocations();
+    switch (temp->GetType()) {
+      case Primitive::kPrimInt:
+        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
+        break;
+
+      case Primitive::kPrimDouble:
+        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+          Location location = Location::FpuRegisterPairLocation(
+              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+          locations->SetTempAt(temp_index, location);
+        } else {
+          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
+        }
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type for temporary location "
+                   << temp->GetType();
+    }
+  }
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
+                                                 size_t max_safepoint_live_regs) {
+  LiveInterval* current = interval;
+  if (current->HasSpillSlot()
+      && current->HasRegister()
+      // Currently, we spill unconditionnally the current method in the code generators.
+      && !interval->GetDefinedBy()->IsCurrentMethod()) {
+    // We spill eagerly, so move must be at definition.
+    InsertMoveAfter(interval->GetDefinedBy(),
+                    interval->ToLocation(),
+                    interval->NeedsTwoSpillSlots()
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+  }
+  UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
+
+  // Walk over all siblings, updating locations of use positions, and
+  // connecting them when they are adjacent.
+  do {
+    Location source = current->ToLocation();
+
+    // Walk over all uses covered by this interval, and update the location
+    // information.
+
+    LiveRange* range = current->GetFirstRange();
+    while (range != nullptr) {
+      while (use != nullptr && use->GetPosition() < range->GetStart()) {
+        DCHECK(use->IsSynthesized());
+        use = use->GetNext();
+      }
+      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
+        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
+          Location expected_location = locations->InAt(use->GetInputIndex());
+          // The expected (actual) location may be invalid in case the input is unused. Currently
+          // this only happens for intrinsics.
+          if (expected_location.IsValid()) {
+            if (expected_location.IsUnallocated()) {
+              locations->SetInAt(use->GetInputIndex(), source);
+            } else if (!expected_location.IsConstant()) {
+              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+            }
+          } else {
+            DCHECK(use->GetUser()->IsInvoke());
+            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+          }
+        }
+        use = use->GetNext();
+      }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition())
+               || (env_use->GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use->GetEnvironment();
+        environment->SetLocationAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
+      range = range->GetNext();
+    }
+
+    // If the next interval starts just after this one, and has a register,
+    // insert a move.
+    LiveInterval* next_sibling = current->GetNextSibling();
+    if (next_sibling != nullptr
+        && next_sibling->HasRegister()
+        && current->GetEnd() == next_sibling->GetStart()) {
+      Location destination = next_sibling->ToLocation();
+      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
+    }
+
+    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+         safepoint_position != nullptr;
+         safepoint_position = safepoint_position->GetNext()) {
+      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+
+      LocationSummary* locations = safepoint_position->GetLocations();
+      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+        DCHECK(interval->GetDefinedBy()->IsActualObject())
+            << interval->GetDefinedBy()->DebugName()
+            << "@" << safepoint_position->GetInstruction()->DebugName();
+        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
+      }
+
+      switch (source.GetKind()) {
+        case Location::kRegister: {
+          locations->AddLiveRegister(source);
+          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
+            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                      max_safepoint_live_regs);
+          }
+          if (current->GetType() == Primitive::kPrimNot) {
+            DCHECK(interval->GetDefinedBy()->IsActualObject())
+                << interval->GetDefinedBy()->DebugName()
+                << "@" << safepoint_position->GetInstruction()->DebugName();
+            locations->SetRegisterBit(source.reg());
+          }
+          break;
+        }
+        case Location::kFpuRegister: {
+          locations->AddLiveRegister(source);
+          break;
+        }
+
+        case Location::kRegisterPair:
+        case Location::kFpuRegisterPair: {
+          locations->AddLiveRegister(source.ToLow());
+          locations->AddLiveRegister(source.ToHigh());
+          break;
+        }
+        case Location::kStackSlot:  // Fall-through
+        case Location::kDoubleStackSlot:  // Fall-through
+        case Location::kConstant: {
+          // Nothing to do.
+          break;
+        }
+        default: {
+          LOG(FATAL) << "Unexpected location for object";
+        }
+      }
+    }
+    current = next_sibling;
+  } while (current != nullptr);
+
+  if (kIsDebugBuild) {
+    // Following uses can only be synthesized uses.
+    while (use != nullptr) {
+      DCHECK(use->IsSynthesized());
+      use = use->GetNext();
+    }
+  }
+}
+
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+    HInstruction* instruction) {
+  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+         (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
+void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
+                                                      HBasicBlock* from,
+                                                      HBasicBlock* to) const {
+  if (interval->GetNextSibling() == nullptr) {
+    // Nothing to connect. The whole range was allocated to the same location.
+    return;
+  }
+
+  // Find the intervals that cover `from` and `to`.
+  size_t destination_position = to->GetLifetimeStart();
+  size_t source_position = from->GetLifetimeEnd() - 1;
+  LiveInterval* destination = interval->GetSiblingAt(destination_position);
+  LiveInterval* source = interval->GetSiblingAt(source_position);
+
+  if (destination == source) {
+    // Interval was not split.
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+  HInstruction* defined_by = parent->GetDefinedBy();
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (destination == nullptr || !destination->CoversSlow(destination_position))) {
+    // Our live_in fixed point calculation has found that the instruction is live
+    // in the `to` block because it will eventually enter an irreducible loop. Our
+    // live interval computation however does not compute a fixed point, and
+    // therefore will not have a location for that instruction for `to`.
+    // Because the instruction is a constant or the ArtMethod, we don't need to
+    // do anything: it will be materialized in the irreducible loop.
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+        << defined_by->DebugName() << ":" << defined_by->GetId()
+        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
+    return;
+  }
+
+  if (!destination->HasRegister()) {
+    // Values are eagerly spilled. Spill slot already contains appropriate value.
+    return;
+  }
+
+  Location location_source;
+  // `GetSiblingAt` returns the interval whose start and end cover `position`,
+  // but does not check whether the interval is inactive at that position.
+  // The only situation where the interval is inactive at that position is in the
+  // presence of irreducible loops for constants and ArtMethod.
+  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+      (source == nullptr || !source->CoversSlow(source_position))) {
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    if (defined_by->IsConstant()) {
+      location_source = defined_by->GetLocations()->Out();
+    } else {
+      DCHECK(defined_by->IsCurrentMethod());
+      location_source = parent->NeedsTwoSpillSlots()
+          ? Location::DoubleStackSlot(parent->GetSpillSlot())
+          : Location::StackSlot(parent->GetSpillSlot());
+    }
+  } else {
+    DCHECK(source != nullptr);
+    DCHECK(source->CoversSlow(source_position));
+    DCHECK(destination->CoversSlow(destination_position));
+    location_source = source->ToLocation();
+  }
+
+  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+  // we need to put the moves at the entry of `to`.
+  if (from->GetNormalSuccessors().size() == 1) {
+    InsertParallelMoveAtExitOf(from,
+                               defined_by,
+                               location_source,
+                               destination->ToLocation());
+  } else {
+    DCHECK_EQ(to->GetPredecessors().size(), 1u);
+    InsertParallelMoveAtEntryOf(to,
+                                defined_by,
+                                location_source,
+                                destination->ToLocation());
+  }
+}
+
+static bool IsValidDestination(Location destination) {
+  return destination.IsRegister()
+      || destination.IsRegisterPair()
+      || destination.IsFpuRegister()
+      || destination.IsFpuRegisterPair()
+      || destination.IsStackSlot()
+      || destination.IsDoubleStackSlot();
+}
+
+void RegisterAllocationResolver::AddMove(HParallelMove* move,
+                                         Location source,
+                                         Location destination,
+                                         HInstruction* instruction,
+                                         Primitive::Type type) const {
+  if (type == Primitive::kPrimLong
+      && codegen_->ShouldSplitLongMoves()
+      // The parallel move resolver knows how to deal with long constants.
+      && !source.IsConstant()) {
+    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
+  } else {
+    move->AddMove(source, destination, type, instruction);
+  }
+}
+
+void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
+                                                 HInstruction* user,
+                                                 Location source,
+                                                 Location destination) const {
+  if (source.Equals(destination)) return;
+
+  DCHECK(!user->IsPhi());
+
+  HInstruction* previous = user->GetPrevious();
+  HParallelMove* move = nullptr;
+  if (previous == nullptr
+      || !previous->IsParallelMove()
+      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(user->GetLifetimePosition());
+    user->GetBlock()->InsertInstructionBefore(move, user);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
+  AddMove(move, source, destination, nullptr, input->GetType());
+}
+
+static bool IsInstructionStart(size_t position) {
+  return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+  return (position & 1) == 1;
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
+                                                      HInstruction* instruction,
+                                                      Location source,
+                                                      Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+  HParallelMove* move;
+  if (at == nullptr) {
+    if (IsInstructionStart(position)) {
+      // Block boundary, don't do anything the connection of split siblings will handle it.
+      return;
+    } else {
+      // Move must happen before the first instruction of the block.
+      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+      // Note that parallel moves may have already been inserted, so we explicitly
+      // ask for the first instruction of the block: `GetInstructionFromPosition` does
+      // not contain the `HParallelMove` instructions.
+      at = at->GetBlock()->GetFirstInstruction();
+
+      if (at->GetLifetimePosition() < position) {
+        // We may insert moves for split siblings and phi spills at the beginning of the block.
+        // Since this is a different lifetime position, we need to go to the next instruction.
+        DCHECK(at->IsParallelMove());
+        at = at->GetNext();
+      }
+
+      if (at->GetLifetimePosition() != position) {
+        DCHECK_GT(at->GetLifetimePosition(), position);
+        move = new (allocator_) HParallelMove(allocator_);
+        move->SetLifetimePosition(position);
+        at->GetBlock()->InsertInstructionBefore(move, at);
+      } else {
+        DCHECK(at->IsParallelMove());
+        move = at->AsParallelMove();
+      }
+    }
+  } else if (IsInstructionEnd(position)) {
+    // Move must happen after the instruction.
+    DCHECK(!at->IsControlFlow());
+    move = at->GetNext()->AsParallelMove();
+    // This is a parallel move for connecting siblings in a same block. We need to
+    // differentiate it with moves for connecting blocks, and input moves.
+    if (move == nullptr || move->GetLifetimePosition() > position) {
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+    }
+  } else {
+    // Move must happen before the instruction.
+    HInstruction* previous = at->GetPrevious();
+    if (previous == nullptr
+        || !previous->IsParallelMove()
+        || previous->GetLifetimePosition() != position) {
+      // If the previous is a parallel move, then its position must be lower
+      // than the given `position`: it was added just after the non-parallel
+      // move instruction that precedes `instruction`.
+      DCHECK(previous == nullptr
+             || !previous->IsParallelMove()
+             || previous->GetLifetimePosition() < position);
+      move = new (allocator_) HParallelMove(allocator_);
+      move->SetLifetimePosition(position);
+      at->GetBlock()->InsertInstructionBefore(move, at);
+    } else {
+      move = previous->AsParallelMove();
+    }
+  }
+  DCHECK_EQ(move->GetLifetimePosition(), position);
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                                            HInstruction* instruction,
+                                                            Location source,
+                                                            Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
+  HInstruction* last = block->GetLastInstruction();
+  // We insert moves at exit for phi predecessors and connecting blocks.
+  // A block ending with an if or a packed switch cannot branch to a block
+  // with phis because we do not allow critical edges. It can also not connect
+  // a split interval between two blocks: the move has to happen in the successor.
+  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
+  HInstruction* previous = last->GetPrevious();
+  HParallelMove* move;
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and output moves.
+  size_t position = last->GetLifetimePosition();
+  if (previous == nullptr || !previous->IsParallelMove()
+      || previous->AsParallelMove()->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, last);
+  } else {
+    move = previous->AsParallelMove();
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                                             HInstruction* instruction,
+                                                             Location source,
+                                                             Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  HInstruction* first = block->GetFirstInstruction();
+  HParallelMove* move = first->AsParallelMove();
+  size_t position = block->GetLifetimeStart();
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and input moves.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    block->InsertInstructionBefore(move, first);
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
+                                                 Location source,
+                                                 Location destination) const {
+  DCHECK(IsValidDestination(destination)) << destination;
+  if (source.Equals(destination)) return;
+
+  if (instruction->IsPhi()) {
+    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
+    return;
+  }
+
+  size_t position = instruction->GetLifetimePosition() + 1;
+  HParallelMove* move = instruction->GetNext()->AsParallelMove();
+  // This is a parallel move for moving the output of an instruction. We need
+  // to differentiate with input moves, moves for connecting siblings in a
+  // and moves for connecting blocks.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
+    move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
+    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+  }
+  AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
new file mode 100644
index 0000000..6ceb9bc
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+
+#include "base/arena_containers.h"
+#include "base/value_object.h"
+#include "primitive.h"
+
+namespace art {
+
+class ArenaAllocator;
+class CodeGenerator;
+class HBasicBlock;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Reconciles the locations assigned to live intervals with the location
+ * summary of each instruction, and inserts moves to resolve split intervals,
+ * nonlinear control flow, and phi inputs.
+ */
+class RegisterAllocationResolver : ValueObject {
+ public:
+  RegisterAllocationResolver(ArenaAllocator* allocator,
+                             CodeGenerator* codegen,
+                             const SsaLivenessAnalysis& liveness);
+
+  void Resolve(size_t max_safepoint_live_core_regs,
+               size_t max_safepoint_live_fp_regs,
+               size_t reserved_out_slots,  // Includes slot(s) for the art method.
+               size_t int_spill_slots,
+               size_t long_spill_slots,
+               size_t float_spill_slots,
+               size_t double_spill_slots,
+               size_t catch_phi_spill_slots,
+               const ArenaVector<LiveInterval*>& temp_intervals);
+
+ private:
+  // Connect adjacent siblings within blocks, and resolve inputs along the way.
+  // Uses max_safepoint_live_regs to check that we did not underestimate the
+  // number of live registers at safepoints.
+  void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+
+  // Connect siblings between block entries and exits.
+  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+  // Helper methods for inserting parallel moves in the graph.
+  void InsertParallelMoveAtExitOf(HBasicBlock* block,
+                                  HInstruction* instruction,
+                                  Location source,
+                                  Location destination) const;
+  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+                                   HInstruction* instruction,
+                                   Location source,
+                                   Location destination) const;
+  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+  void AddInputMoveFor(HInstruction* input,
+                       HInstruction* user,
+                       Location source,
+                       Location destination) const;
+  void InsertParallelMoveAt(size_t position,
+                            HInstruction* instruction,
+                            Location source,
+                            Location destination) const;
+  void AddMove(HParallelMove* move,
+               Location source,
+               Location destination,
+               HInstruction* instruction,
+               Primitive::Type type) const;
+
+  ArenaAllocator* const allocator_;
+  CodeGenerator* const codegen_;
+  const SsaLivenessAnalysis& liveness_;
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 9d99668..2367ce1 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,65 +21,30 @@
 
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 
+
 namespace art {
 
-static constexpr size_t kMaxLifetimePosition = -1;
-static constexpr size_t kDefaultNumberOfSpillSlots = 4;
-
-// For simplicity, we implement register pairs as (reg, reg + 1).
-// Note that this is a requirement for double registers on ARM, since we
-// allocate SRegister.
-static int GetHighForLowRegister(int reg) { return reg + 1; }
-static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
-static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
-  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
-}
-
 RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
                                      CodeGenerator* codegen,
                                      const SsaLivenessAnalysis& liveness)
-      : allocator_(allocator),
-        codegen_(codegen),
-        liveness_(liveness),
-        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        unhandled_(nullptr),
-        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        catch_phi_spill_slots_(0),
-        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        processing_core_registers_(false),
-        number_of_registers_(-1),
-        registers_array_(nullptr),
-        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
-        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
-        reserved_out_slots_(0),
-        maximum_number_of_live_core_registers_(0),
-        maximum_number_of_live_fp_registers_(0) {
-  temp_intervals_.reserve(4);
-  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
-  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+    : allocator_(allocator),
+      codegen_(codegen),
+      liveness_(liveness) {}
 
-  codegen->SetupBlockedRegisters();
-  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
-  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
-  // Always reserve for the current method and the graph's max out registers.
-  // TODO: compute it instead.
-  // ArtMethod* takes 2 vregs for 64 bits.
-  reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
-      codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+                                             CodeGenerator* codegen,
+                                             const SsaLivenessAnalysis& analysis,
+                                             Strategy strategy) {
+  switch (strategy) {
+    case kRegisterAllocatorLinearScan:
+      return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+    default:
+      LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+      UNREACHABLE();
+  }
 }
 
 bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -93,328 +58,6 @@
       || instruction_set == kX86_64;
 }
 
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
-  if (interval == nullptr) return false;
-  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
-      && (interval->GetType() != Primitive::kPrimFloat);
-  return processing_core_registers == is_core_register;
-}
-
-void RegisterAllocator::AllocateRegisters() {
-  AllocateRegistersInternal();
-  Resolve();
-
-  if (kIsDebugBuild) {
-    processing_core_registers_ = true;
-    ValidateInternal(true);
-    processing_core_registers_ = false;
-    ValidateInternal(true);
-    // Check that the linear order is still correct with regards to lifetime positions.
-    // Since only parallel moves have been inserted during the register allocation,
-    // these checks are mostly for making sure these moves have been added correctly.
-    size_t current_liveness = 0;
-    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
-      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
-        current_liveness = instruction->GetLifetimePosition();
-      }
-      for (HInstructionIterator inst_it(block->GetInstructions());
-           !inst_it.Done();
-           inst_it.Advance()) {
-        HInstruction* instruction = inst_it.Current();
-        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
-        current_liveness = instruction->GetLifetimePosition();
-      }
-    }
-  }
-}
-
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
-  int reg = location.reg();
-  DCHECK(location.IsRegister() || location.IsFpuRegister());
-  LiveInterval* interval = location.IsRegister()
-      ? physical_core_register_intervals_[reg]
-      : physical_fp_register_intervals_[reg];
-  Primitive::Type type = location.IsRegister()
-      ? Primitive::kPrimInt
-      : Primitive::kPrimFloat;
-  if (interval == nullptr) {
-    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
-    if (location.IsRegister()) {
-      physical_core_register_intervals_[reg] = interval;
-    } else {
-      physical_fp_register_intervals_[reg] = interval;
-    }
-  }
-  DCHECK(interval->GetRegister() == reg);
-  interval->AddRange(start, end);
-}
-
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
-  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
-      BlockRegister(Location::RegisterLocation(i), start, end);
-    }
-  }
-  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
-      BlockRegister(Location::FpuRegisterLocation(i), start, end);
-    }
-  }
-}
-
-void RegisterAllocator::AllocateRegistersInternal() {
-  // Iterate post-order, to ensure the list is sorted, and the last added interval
-  // is the one with the lowest start position.
-  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
-         back_it.Advance()) {
-      ProcessInstruction(back_it.Current());
-    }
-    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-      ProcessInstruction(inst_it.Current());
-    }
-
-    if (block->IsCatchBlock() ||
-        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
-      // By blocking all registers at the top of each catch block or irreducible loop, we force
-      // intervals belonging to the live-in set of the catch/header block to be spilled.
-      // TODO(ngeoffray): Phis in this block could be allocated in register.
-      size_t position = block->GetLifetimeStart();
-      BlockRegisters(position, position + 1);
-    }
-  }
-
-  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = true;
-  unhandled_ = &unhandled_core_intervals_;
-  for (LiveInterval* fixed : physical_core_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-
-  inactive_.clear();
-  active_.clear();
-  handled_.clear();
-
-  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
-                                                    kArenaAllocRegisterAllocator);
-  processing_core_registers_ = false;
-  unhandled_ = &unhandled_fp_intervals_;
-  for (LiveInterval* fixed : physical_fp_register_intervals_) {
-    if (fixed != nullptr) {
-      // Fixed interval is added to inactive_ instead of unhandled_.
-      // It's also the only type of inactive interval whose start position
-      // can be after the current interval during linear scan.
-      // Fixed interval is never split and never moves to unhandled_.
-      inactive_.push_back(fixed);
-    }
-  }
-  LinearScan();
-}
-
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  size_t position = instruction->GetLifetimePosition();
-
-  if (locations == nullptr) return;
-
-  // Create synthesized intervals for temporaries.
-  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
-    Location temp = locations->GetTemp(i);
-    if (temp.IsRegister() || temp.IsFpuRegister()) {
-      BlockRegister(temp, position, position + 1);
-      // Ensure that an explicit temporary register is marked as being allocated.
-      codegen_->AddAllocatedRegister(temp);
-    } else {
-      DCHECK(temp.IsUnallocated());
-      switch (temp.GetPolicy()) {
-        case Location::kRequiresRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          unhandled_core_intervals_.push_back(interval);
-          break;
-        }
-
-        case Location::kRequiresFpuRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
-          temp_intervals_.push_back(interval);
-          interval->AddTempUse(instruction, i);
-          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-            interval->AddHighInterval(/* is_temp */ true);
-            LiveInterval* high = interval->GetHighInterval();
-            temp_intervals_.push_back(high);
-            unhandled_fp_intervals_.push_back(high);
-          }
-          unhandled_fp_intervals_.push_back(interval);
-          break;
-        }
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << temp.GetPolicy();
-      }
-    }
-  }
-
-  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
-      && (instruction->GetType() != Primitive::kPrimFloat);
-
-  if (locations->NeedsSafepoint()) {
-    if (codegen_->IsLeafMethod()) {
-      // TODO: We do this here because we do not want the suspend check to artificially
-      // create live registers. We should find another place, but this is currently the
-      // simplest.
-      DCHECK(instruction->IsSuspendCheckEntry());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      return;
-    }
-    safepoints_.push_back(instruction);
-    if (locations->OnlyCallsOnSlowPath()) {
-      // We add a synthesized range at this position to record the live registers
-      // at this position. Ideally, we could just update the safepoints when locations
-      // are updated, but we currently need to know the full stack size before updating
-      // locations (because of parameters and the fact that we don't have a frame pointer).
-      // And knowing the full stack size requires to know the maximum number of live
-      // registers at calls in slow paths.
-      // By adding the following interval in the algorithm, we can compute this
-      // maximum before updating locations.
-      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
-      interval->AddRange(position, position + 1);
-      AddSorted(&unhandled_core_intervals_, interval);
-      AddSorted(&unhandled_fp_intervals_, interval);
-    }
-  }
-
-  if (locations->WillCall()) {
-    BlockRegisters(position, position + 1, /* caller_save_only */ true);
-  }
-
-  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
-    Location input = locations->InAt(i);
-    if (input.IsRegister() || input.IsFpuRegister()) {
-      BlockRegister(input, position, position + 1);
-    } else if (input.IsPair()) {
-      BlockRegister(input.ToLow(), position, position + 1);
-      BlockRegister(input.ToHigh(), position, position + 1);
-    }
-  }
-
-  LiveInterval* current = instruction->GetLiveInterval();
-  if (current == nullptr) return;
-
-  ArenaVector<LiveInterval*>& unhandled = core_register
-      ? unhandled_core_intervals_
-      : unhandled_fp_intervals_;
-
-  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
-
-  if (codegen_->NeedsTwoRegisters(current->GetType())) {
-    current->AddHighInterval();
-  }
-
-  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
-    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
-    size_t safepoint_position = safepoint->GetLifetimePosition();
-
-    // Test that safepoints are ordered in the optimal way.
-    DCHECK(safepoint_index == safepoints_.size() ||
-           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
-    if (safepoint_position == current->GetStart()) {
-      // The safepoint is for this instruction, so the location of the instruction
-      // does not need to be saved.
-      DCHECK_EQ(safepoint_index, safepoints_.size());
-      DCHECK_EQ(safepoint, instruction);
-      continue;
-    } else if (current->IsDeadAt(safepoint_position)) {
-      break;
-    } else if (!current->Covers(safepoint_position)) {
-      // Hole in the interval.
-      continue;
-    }
-    current->AddSafepoint(safepoint);
-  }
-  current->ResetSearchCache();
-
-  // Some instructions define their output in fixed register/stack slot. We need
-  // to ensure we know these locations before doing register allocation. For a
-  // given register, we create an interval that covers these locations. The register
-  // will be unavailable at these locations when trying to allocate one for an
-  // interval.
-  //
-  // The backwards walking ensures the ranges are ordered on increasing start positions.
-  Location output = locations->Out();
-  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
-    Location first = locations->InAt(0);
-    if (first.IsRegister() || first.IsFpuRegister()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.reg());
-    } else if (first.IsPair()) {
-      current->SetFrom(position + 1);
-      current->SetRegister(first.low());
-      LiveInterval* high = current->GetHighInterval();
-      high->SetRegister(first.high());
-      high->SetFrom(position + 1);
-    }
-  } else if (output.IsRegister() || output.IsFpuRegister()) {
-    // Shift the interval's start by one to account for the blocked register.
-    current->SetFrom(position + 1);
-    current->SetRegister(output.reg());
-    BlockRegister(output, position, position + 1);
-  } else if (output.IsPair()) {
-    current->SetFrom(position + 1);
-    current->SetRegister(output.low());
-    LiveInterval* high = current->GetHighInterval();
-    high->SetRegister(output.high());
-    high->SetFrom(position + 1);
-    BlockRegister(output.ToLow(), position, position + 1);
-    BlockRegister(output.ToHigh(), position, position + 1);
-  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
-    current->SetSpillSlot(output.GetStackIndex());
-  } else {
-    DCHECK(output.IsUnallocated() || output.IsConstant());
-  }
-
-  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
-  }
-
-  // If needed, add interval to the list of unhandled intervals.
-  if (current->HasSpillSlot() || instruction->IsConstant()) {
-    // Split just before first register use.
-    size_t first_register_use = current->FirstRegisterUse();
-    if (first_register_use != kNoLifetime) {
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      // Don't add directly to `unhandled`, it needs to be sorted and the start
-      // of this new interval might be after intervals already in the list.
-      AddSorted(&unhandled, split);
-    } else {
-      // Nothing to do, we won't allocate a register for this value.
-    }
-  } else {
-    // Don't add directly to `unhandled`, temp or safepoint intervals
-    // for this instruction may have been added, and those can be
-    // processed first.
-    AddSorted(&unhandled, current);
-  }
-}
-
 class AllRangesIterator : public ValueObject {
  public:
   explicit AllRangesIterator(LiveInterval* interval)
@@ -442,36 +85,6 @@
   DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
 };
 
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
-  // To simplify unit testing, we eagerly create the array of intervals, and
-  // call the helper method.
-  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
-  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
-      intervals.push_back(instruction->GetLiveInterval());
-    }
-  }
-
-  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
-      ? &physical_core_register_intervals_
-      : &physical_fp_register_intervals_;
-  for (LiveInterval* fixed : *physical_register_intervals) {
-    if (fixed != nullptr) {
-      intervals.push_back(fixed);
-    }
-  }
-
-  for (LiveInterval* temp : temp_intervals_) {
-    if (ShouldProcess(processing_core_registers_, temp)) {
-      intervals.push_back(temp);
-    }
-  }
-
-  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
-                           allocator_, processing_core_registers_, log_fatal_on_failure);
-}
-
 bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                           size_t number_of_spill_slots,
                                           size_t number_of_out_slots,
@@ -564,638 +177,30 @@
   return true;
 }
 
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
-  interval->Dump(stream);
-  stream << ": ";
-  if (interval->HasRegister()) {
-    if (interval->IsFloatingPoint()) {
-      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
-    } else {
-      codegen_->DumpCoreRegister(stream, interval->GetRegister());
-    }
-  } else {
-    stream << "spilled";
-  }
-  stream << std::endl;
-}
-
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
-  stream << "inactive: " << std::endl;
-  for (LiveInterval* inactive_interval : inactive_) {
-    DumpInterval(stream, inactive_interval);
-  }
-  stream << "active: " << std::endl;
-  for (LiveInterval* active_interval : active_) {
-    DumpInterval(stream, active_interval);
-  }
-  stream << "unhandled: " << std::endl;
-  auto unhandled = (unhandled_ != nullptr) ?
-      unhandled_ : &unhandled_core_intervals_;
-  for (LiveInterval* unhandled_interval : *unhandled) {
-    DumpInterval(stream, unhandled_interval);
-  }
-  stream << "handled: " << std::endl;
-  for (LiveInterval* handled_interval : handled_) {
-    DumpInterval(stream, handled_interval);
-  }
-}
-
-// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
-  while (!unhandled_->empty()) {
-    // (1) Remove interval with the lowest start position from unhandled.
-    LiveInterval* current = unhandled_->back();
-    unhandled_->pop_back();
-
-    // Make sure the interval is an expected state.
-    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
-    // Make sure we are going in the right order.
-    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
-    // Make sure a low interval is always with a high.
-    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
-    // Make sure a high interval is always with a low.
-    DCHECK(current->IsLowInterval() ||
-           unhandled_->empty() ||
-           !unhandled_->back()->IsHighInterval());
-
-    size_t position = current->GetStart();
-
-    // Remember the inactive_ size here since the ones moved to inactive_ from
-    // active_ below shouldn't need to be re-checked.
-    size_t inactive_intervals_to_handle = inactive_.size();
-
-    // (2) Remove currently active intervals that are dead at this position.
-    //     Move active intervals that have a lifetime hole at this position
-    //     to inactive.
-    auto active_kept_end = std::remove_if(
-        active_.begin(),
-        active_.end(),
-        [this, position](LiveInterval* interval) {
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (!interval->Covers(position)) {
-            inactive_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    active_.erase(active_kept_end, active_.end());
-
-    // (3) Remove currently inactive intervals that are dead at this position.
-    //     Move inactive intervals that cover this position to active.
-    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
-    auto inactive_kept_end = std::remove_if(
-        inactive_.begin(),
-        inactive_to_handle_end,
-        [this, position](LiveInterval* interval) {
-          DCHECK(interval->GetStart() < position || interval->IsFixed());
-          if (interval->IsDeadAt(position)) {
-            handled_.push_back(interval);
-            return true;
-          } else if (interval->Covers(position)) {
-            active_.push_back(interval);
-            return true;
-          } else {
-            return false;  // Keep this interval.
-          }
-        });
-    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
-
-    if (current->IsSlowPathSafepoint()) {
-      // Synthesized interval to record the maximum number of live registers
-      // at safepoints. No need to allocate a register for it.
-      if (processing_core_registers_) {
-        maximum_number_of_live_core_registers_ =
-          std::max(maximum_number_of_live_core_registers_, active_.size());
-      } else {
-        maximum_number_of_live_fp_registers_ =
-          std::max(maximum_number_of_live_fp_registers_, active_.size());
-      }
-      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
-      continue;
-    }
-
-    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
-      DCHECK(!current->HasRegister());
-      // Allocating the low part was unsucessful. The splitted interval for the high part
-      // will be handled next (it is in the `unhandled_` list).
-      continue;
-    }
-
-    // (4) Try to find an available register.
-    bool success = TryAllocateFreeReg(current);
-
-    // (5) If no register could be found, we need to spill.
-    if (!success) {
-      success = AllocateBlockedReg(current);
-    }
-
-    // (6) If the interval had a register allocated, add it to the list of active
-    //     intervals.
-    if (success) {
-      codegen_->AddAllocatedRegister(processing_core_registers_
-          ? Location::RegisterLocation(current->GetRegister())
-          : Location::FpuRegisterLocation(current->GetRegister()));
-      active_.push_back(current);
-      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
-        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
-      }
-    }
-  }
-}
-
-static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
-  DCHECK(!interval->IsHighInterval());
-  // Note that the same instruction may occur multiple times in the input list,
-  // so `free_until` may have changed already.
-  // Since `position` is not the current scan position, we need to use CoversSlow.
-  if (interval->IsDeadAt(position)) {
-    // Set the register to be free. Note that inactive intervals might later
-    // update this.
-    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+  DCHECK_GE(position, interval->GetStart());
+  DCHECK(!interval->IsDeadAt(position));
+  if (position == interval->GetStart()) {
+    // Spill slot will be allocated when handling `interval` again.
+    interval->ClearRegister();
     if (interval->HasHighInterval()) {
-      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+      interval->GetHighInterval()->ClearRegister();
+    } else if (interval->HasLowInterval()) {
+      interval->GetLowInterval()->ClearRegister();
     }
-  } else if (!interval->CoversSlow(position)) {
-    // The interval becomes inactive at `defined_by`. We make its register
-    // available only until the next use strictly after `defined_by`.
-    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    return interval;
+  } else {
+    LiveInterval* new_interval = interval->SplitAt(position);
     if (interval->HasHighInterval()) {
-      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
-      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+      new_interval->SetHighInterval(high);
+      high->SetLowInterval(new_interval);
+    } else if (interval->HasLowInterval()) {
+      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+      new_interval->SetLowInterval(low);
+      low->SetHighInterval(new_interval);
     }
-  }
-}
-
-// Find a free register. If multiple are found, pick the register that
-// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
-  size_t* free_until = registers_array_;
-
-  // First set all registers to be free.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    free_until[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, set its register to not free.
-  for (LiveInterval* interval : active_) {
-    DCHECK(interval->HasRegister());
-    free_until[interval->GetRegister()] = 0;
-  }
-
-  // An interval that starts an instruction (that is, it is not split), may
-  // re-use the registers used by the inputs of that instruciton, based on the
-  // location summary.
-  HInstruction* defined_by = current->GetDefinedBy();
-  if (defined_by != nullptr && !current->IsSplit()) {
-    LocationSummary* locations = defined_by->GetLocations();
-    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
-      HInputsRef inputs = defined_by->GetInputs();
-      for (size_t i = 0; i < inputs.size(); ++i) {
-        // Take the last interval of the input. It is the location of that interval
-        // that will be used at `defined_by`.
-        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
-        // Note that interval may have not been processed yet.
-        // TODO: Handle non-split intervals last in the work list.
-        if (locations->InAt(i).IsValid()
-            && interval->HasRegister()
-            && interval->SameRegisterKind(*current)) {
-          // The input must be live until the end of `defined_by`, to comply to
-          // the linear scan algorithm. So we use `defined_by`'s end lifetime
-          // position to check whether the input is dead or is inactive after
-          // `defined_by`.
-          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
-          size_t position = defined_by->GetLifetimePosition() + 1;
-          FreeIfNotCoverAt(interval, position, free_until);
-        }
-      }
-    }
-  }
-
-  // For each inactive interval, set its register to be free until
-  // the next intersection with `current`.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-
-    DCHECK(inactive->HasRegister());
-    if (free_until[inactive->GetRegister()] == 0) {
-      // Already used by some active interval. No need to intersect.
-      continue;
-    }
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      free_until[inactive->GetRegister()] =
-          std::min(free_until[inactive->GetRegister()], next_intersection);
-    }
-  }
-
-  int reg = kNoRegister;
-  if (current->HasRegister()) {
-    // Some instructions have a fixed register output.
-    reg = current->GetRegister();
-    if (free_until[reg] == 0) {
-      DCHECK(current->IsHighInterval());
-      // AllocateBlockedReg will spill the holder of the register.
-      return false;
-    }
-  } else {
-    DCHECK(!current->IsHighInterval());
-    int hint = current->FindFirstRegisterHint(free_until, liveness_);
-    if ((hint != kNoRegister)
-        // For simplicity, if the hint we are getting for a pair cannot be used,
-        // we are just going to allocate a new pair.
-        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
-      DCHECK(!IsBlocked(hint));
-      reg = hint;
-    } else if (current->IsLowInterval()) {
-      reg = FindAvailableRegisterPair(free_until, current->GetStart());
-    } else {
-      reg = FindAvailableRegister(free_until, current);
-    }
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  // If we could not find a register, we need to spill.
-  if (free_until[reg] == 0) {
-    return false;
-  }
-
-  if (current->IsLowInterval()) {
-    // If the high register of this interval is not available, we need to spill.
-    int high_reg = current->GetHighInterval()->GetRegister();
-    if (high_reg == kNoRegister) {
-      high_reg = GetHighForLowRegister(reg);
-    }
-    if (free_until[high_reg] == 0) {
-      return false;
-    }
-  }
-
-  current->SetRegister(reg);
-  if (!current->IsDeadAt(free_until[reg])) {
-    // If the register is only available for a subset of live ranges
-    // covered by `current`, split `current` before the position where
-    // the register is not available anymore.
-    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
-    DCHECK(split != nullptr);
-    AddSorted(unhandled_, split);
-  }
-  return true;
-}
-
-bool RegisterAllocator::IsBlocked(int reg) const {
-  return processing_core_registers_
-      ? blocked_core_registers_[reg]
-      : blocked_fp_registers_[reg];
-}
-
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
-  int reg = kNoRegister;
-  // Pick the register pair that is used the last.
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) continue;
-    if (!IsLowRegister(i)) continue;
-    int high_register = GetHighForLowRegister(i);
-    if (IsBlocked(high_register)) continue;
-    int existing_high_register = GetHighForLowRegister(reg);
-    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
-                        && next_use[high_register] >= next_use[existing_high_register])) {
-      reg = i;
-      if (next_use[i] == kMaxLifetimePosition
-          && next_use[high_register] == kMaxLifetimePosition) {
-        break;
-      }
-    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
-      // If one of the current register is known to be unavailable, just unconditionally
-      // try a new one.
-      reg = i;
-    }
-  }
-  return reg;
-}
-
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
-  return processing_core_registers_
-      ? !codegen_->IsCoreCalleeSaveRegister(reg)
-      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
-  // We special case intervals that do not span a safepoint to try to find a caller-save
-  // register if one is available. We iterate from 0 to the number of registers,
-  // so if there are caller-save registers available at the end, we continue the iteration.
-  bool prefers_caller_save = !current->HasWillCallSafepoint();
-  int reg = kNoRegister;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) {
-      // Register cannot be used. Continue.
-      continue;
-    }
-
-    // Best case: we found a register fully available.
-    if (next_use[i] == kMaxLifetimePosition) {
-      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
-        // We can get shorter encodings on some platforms by using
-        // small register numbers. So only update the candidate if the previous
-        // one was not available for the whole method.
-        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
-          reg = i;
-        }
-        // Continue the iteration in the hope of finding a caller save register.
-        continue;
-      } else {
-        reg = i;
-        // We know the register is good enough. Return it.
-        break;
-      }
-    }
-
-    // If we had no register before, take this one as a reference.
-    if (reg == kNoRegister) {
-      reg = i;
-      continue;
-    }
-
-    // Pick the register that is used the last.
-    if (next_use[i] > next_use[reg]) {
-      reg = i;
-      continue;
-    }
-  }
-  return reg;
-}
-
-// Remove interval and its other half if any. Return iterator to the following element.
-static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
-    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
-  DCHECK(intervals->begin() <= pos && pos < intervals->end());
-  LiveInterval* interval = *pos;
-  if (interval->IsLowInterval()) {
-    DCHECK(pos + 1 < intervals->end());
-    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
-    return intervals->erase(pos, pos + 2);
-  } else if (interval->IsHighInterval()) {
-    DCHECK(intervals->begin() < pos);
-    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
-    return intervals->erase(pos - 1, pos + 1);
-  } else {
-    return intervals->erase(pos);
-  }
-}
-
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                                 size_t first_register_use,
-                                                                 size_t* next_use) {
-  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-    LiveInterval* active = *it;
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) continue;
-    if (active->IsHighInterval()) continue;
-    if (first_register_use > next_use[active->GetRegister()]) continue;
-
-    // Split the first interval found that is either:
-    // 1) A non-pair interval.
-    // 2) A pair interval whose high is not low + 1.
-    // 3) A pair interval whose low is not even.
-    if (!active->IsLowInterval() ||
-        IsLowOfUnalignedPairInterval(active) ||
-        !IsLowRegister(active->GetRegister())) {
-      LiveInterval* split = Split(active, position);
-      if (split != active) {
-        handled_.push_back(active);
-      }
-      RemoveIntervalAndPotentialOtherHalf(&active_, it);
-      AddSorted(unhandled_, split);
-      return true;
-    }
-  }
-  return false;
-}
-
-// Find the register that is used the last, and spill the interval
-// that holds it. If the first use of `current` is after that register
-// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
-  size_t first_register_use = current->FirstRegisterUse();
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    // The low interval has allocated the register for the high interval. In
-    // case the low interval had to split both intervals, we may end up in a
-    // situation where the high interval does not have a register use anymore.
-    // We must still proceed in order to split currently active and inactive
-    // uses of the high interval's register, and put the high interval in the
-    // active set.
-    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
-  } else if (first_register_use == kNoLifetime) {
-    AllocateSpillSlotFor(current);
-    return false;
-  }
-
-  // First set all registers as not being used.
-  size_t* next_use = registers_array_;
-  for (size_t i = 0; i < number_of_registers_; ++i) {
-    next_use[i] = kMaxLifetimePosition;
-  }
-
-  // For each active interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* active : active_) {
-    DCHECK(active->HasRegister());
-    if (active->IsFixed()) {
-      next_use[active->GetRegister()] = current->GetStart();
-    } else {
-      size_t use = active->FirstRegisterUseAfter(current->GetStart());
-      if (use != kNoLifetime) {
-        next_use[active->GetRegister()] = use;
-      }
-    }
-  }
-
-  // For each inactive interval, find the next use of its register after the
-  // start of current.
-  for (LiveInterval* inactive : inactive_) {
-    // Temp/Slow-path-safepoint interval has no holes.
-    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
-    if (!current->IsSplit() && !inactive->IsFixed()) {
-      // Neither current nor inactive are fixed.
-      // Thanks to SSA, a non-split interval starting in a hole of an
-      // inactive interval should never intersect with that inactive interval.
-      // Only if it's not fixed though, because fixed intervals don't come from SSA.
-      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-      continue;
-    }
-    DCHECK(inactive->HasRegister());
-    size_t next_intersection = inactive->FirstIntersectionWith(current);
-    if (next_intersection != kNoLifetime) {
-      if (inactive->IsFixed()) {
-        next_use[inactive->GetRegister()] =
-            std::min(next_intersection, next_use[inactive->GetRegister()]);
-      } else {
-        size_t use = inactive->FirstUseAfter(current->GetStart());
-        if (use != kNoLifetime) {
-          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
-        }
-      }
-    }
-  }
-
-  int reg = kNoRegister;
-  bool should_spill = false;
-  if (current->HasRegister()) {
-    DCHECK(current->IsHighInterval());
-    reg = current->GetRegister();
-    // When allocating the low part, we made sure the high register was available.
-    DCHECK_LT(first_register_use, next_use[reg]);
-  } else if (current->IsLowInterval()) {
-    reg = FindAvailableRegisterPair(next_use, first_register_use);
-    // We should spill if both registers are not available.
-    should_spill = (first_register_use >= next_use[reg])
-      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
-  } else {
-    DCHECK(!current->IsHighInterval());
-    reg = FindAvailableRegister(next_use, current);
-    should_spill = (first_register_use >= next_use[reg]);
-  }
-
-  DCHECK_NE(reg, kNoRegister);
-  if (should_spill) {
-    DCHECK(!current->IsHighInterval());
-    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
-    if (is_allocation_at_use_site) {
-      if (!current->IsLowInterval()) {
-        DumpInterval(std::cerr, current);
-        DumpAllIntervals(std::cerr);
-        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
-        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
-        CHECK(false) << "There is not enough registers available for "
-          << current->GetParent()->GetDefinedBy()->DebugName() << " "
-          << current->GetParent()->GetDefinedBy()->GetId()
-          << " at " << first_register_use - 1 << " "
-          << (at == nullptr ? "" : at->DebugName());
-      }
-
-      // If we're allocating a register for `current` because the instruction at
-      // that position requires it, but we think we should spill, then there are
-      // non-pair intervals or unaligned pair intervals blocking the allocation.
-      // We split the first interval found, and put ourselves first in the
-      // `unhandled_` list.
-      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
-                                                              first_register_use,
-                                                              next_use);
-      DCHECK(success);
-      LiveInterval* existing = unhandled_->back();
-      DCHECK(existing->IsHighInterval());
-      DCHECK_EQ(existing->GetLowInterval(), current);
-      unhandled_->push_back(current);
-    } else {
-      // If the first use of that instruction is after the last use of the found
-      // register, we split this interval just before its first register use.
-      AllocateSpillSlotFor(current);
-      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
-      DCHECK(current != split);
-      AddSorted(unhandled_, split);
-    }
-    return false;
-  } else {
-    // Use this register and spill the active and inactives interval that
-    // have that register.
-    current->SetRegister(reg);
-
-    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-      LiveInterval* active = *it;
-      if (active->GetRegister() == reg) {
-        DCHECK(!active->IsFixed());
-        LiveInterval* split = Split(active, current->GetStart());
-        if (split != active) {
-          handled_.push_back(active);
-        }
-        RemoveIntervalAndPotentialOtherHalf(&active_, it);
-        AddSorted(unhandled_, split);
-        break;
-      }
-    }
-
-    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
-    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
-      LiveInterval* inactive = *it;
-      bool erased = false;
-      if (inactive->GetRegister() == reg) {
-        if (!current->IsSplit() && !inactive->IsFixed()) {
-          // Neither current nor inactive are fixed.
-          // Thanks to SSA, a non-split interval starting in a hole of an
-          // inactive interval should never intersect with that inactive interval.
-          // Only if it's not fixed though, because fixed intervals don't come from SSA.
-          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-        } else {
-          size_t next_intersection = inactive->FirstIntersectionWith(current);
-          if (next_intersection != kNoLifetime) {
-            if (inactive->IsFixed()) {
-              LiveInterval* split = Split(current, next_intersection);
-              DCHECK_NE(split, current);
-              AddSorted(unhandled_, split);
-            } else {
-              // Split at the start of `current`, which will lead to splitting
-              // at the end of the lifetime hole of `inactive`.
-              LiveInterval* split = Split(inactive, current->GetStart());
-              // If it's inactive, it must start before the current interval.
-              DCHECK_NE(split, inactive);
-              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
-              erased = true;
-              handled_.push_back(inactive);
-              AddSorted(unhandled_, split);
-            }
-          }
-        }
-      }
-      // If we have erased the element, `it` already points to the next element.
-      // Otherwise we need to move to the next element.
-      if (!erased) {
-        ++it;
-      }
-    }
-
-    return true;
-  }
-}
-
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
-  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
-  size_t insert_at = 0;
-  for (size_t i = array->size(); i > 0; --i) {
-    LiveInterval* current = (*array)[i - 1u];
-    // High intervals must be processed right after their low equivalent.
-    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
-      insert_at = i;
-      break;
-    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
-      // Ensure the slow path interval is the last to be processed at its location: we want the
-      // interval to know all live registers at this location.
-      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
-      insert_at = i;
-      break;
-    }
-  }
-
-  // Insert the high interval before the low, to ensure the low is processed before.
-  auto insert_pos = array->begin() + insert_at;
-  if (interval->HasHighInterval()) {
-    array->insert(insert_pos, { interval->GetHighInterval(), interval });
-  } else if (interval->HasLowInterval()) {
-    array->insert(insert_pos, { interval, interval->GetLowInterval() });
-  } else {
-    array->insert(insert_pos, interval);
+    return new_interval;
   }
 }
 
@@ -1258,748 +263,4 @@
   return Split(interval, block_to->GetLifetimeStart());
 }
 
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
-  DCHECK_GE(position, interval->GetStart());
-  DCHECK(!interval->IsDeadAt(position));
-  if (position == interval->GetStart()) {
-    // Spill slot will be allocated when handling `interval` again.
-    interval->ClearRegister();
-    if (interval->HasHighInterval()) {
-      interval->GetHighInterval()->ClearRegister();
-    } else if (interval->HasLowInterval()) {
-      interval->GetLowInterval()->ClearRegister();
-    }
-    return interval;
-  } else {
-    LiveInterval* new_interval = interval->SplitAt(position);
-    if (interval->HasHighInterval()) {
-      LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
-      new_interval->SetHighInterval(high);
-      high->SetLowInterval(new_interval);
-    } else if (interval->HasLowInterval()) {
-      LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
-      new_interval->SetLowInterval(low);
-      low->SetHighInterval(new_interval);
-    }
-    return new_interval;
-  }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
-  if (interval->IsHighInterval()) {
-    // The low interval already took care of allocating the spill slot.
-    DCHECK(!interval->GetLowInterval()->HasRegister());
-    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
-    return;
-  }
-
-  LiveInterval* parent = interval->GetParent();
-
-  // An instruction gets a spill slot for its entire lifetime. If the parent
-  // of this interval already has a spill slot, there is nothing to do.
-  if (parent->HasSpillSlot()) {
-    return;
-  }
-
-  HInstruction* defined_by = parent->GetDefinedBy();
-  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
-
-  if (defined_by->IsParameterValue()) {
-    // Parameters have their own stack slot.
-    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
-    return;
-  }
-
-  if (defined_by->IsCurrentMethod()) {
-    parent->SetSpillSlot(0);
-    return;
-  }
-
-  if (defined_by->IsConstant()) {
-    // Constants don't need a spill slot.
-    return;
-  }
-
-  ArenaVector<size_t>* spill_slots = nullptr;
-  switch (interval->GetType()) {
-    case Primitive::kPrimDouble:
-      spill_slots = &double_spill_slots_;
-      break;
-    case Primitive::kPrimLong:
-      spill_slots = &long_spill_slots_;
-      break;
-    case Primitive::kPrimFloat:
-      spill_slots = &float_spill_slots_;
-      break;
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimShort:
-      spill_slots = &int_spill_slots_;
-      break;
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
-  }
-
-  // Find an available spill slot.
-  size_t slot = 0;
-  for (size_t e = spill_slots->size(); slot < e; ++slot) {
-    if ((*spill_slots)[slot] <= parent->GetStart()
-        && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
-      break;
-    }
-  }
-
-  size_t end = interval->GetLastSibling()->GetEnd();
-  if (parent->NeedsTwoSpillSlots()) {
-    if (slot + 2u > spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->resize(slot + 2u, end);
-    }
-    (*spill_slots)[slot] = end;
-    (*spill_slots)[slot + 1] = end;
-  } else {
-    if (slot == spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->push_back(end);
-    } else {
-      (*spill_slots)[slot] = end;
-    }
-  }
-
-  // Note that the exact spill slot location will be computed when we resolve,
-  // that is when we know the number of spill slots for each type.
-  parent->SetSpillSlot(slot);
-}
-
-static bool IsValidDestination(Location destination) {
-  return destination.IsRegister()
-      || destination.IsRegisterPair()
-      || destination.IsFpuRegister()
-      || destination.IsFpuRegisterPair()
-      || destination.IsStackSlot()
-      || destination.IsDoubleStackSlot();
-}
-
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
-  LiveInterval* interval = phi->GetLiveInterval();
-
-  HInstruction* previous_phi = phi->GetPrevious();
-  DCHECK(previous_phi == nullptr ||
-         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
-      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
-
-  if (phi->IsVRegEquivalentOf(previous_phi)) {
-    // This is an equivalent of the previous phi. We need to assign the same
-    // catch phi slot.
-    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
-    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
-  } else {
-    // Allocate a new spill slot for this catch phi.
-    // TODO: Reuse spill slots when intervals of phis from different catch
-    //       blocks do not overlap.
-    interval->SetSpillSlot(catch_phi_spill_slots_);
-    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
-  }
-}
-
-void RegisterAllocator::AddMove(HParallelMove* move,
-                                Location source,
-                                Location destination,
-                                HInstruction* instruction,
-                                Primitive::Type type) const {
-  if (type == Primitive::kPrimLong
-      && codegen_->ShouldSplitLongMoves()
-      // The parallel move resolver knows how to deal with long constants.
-      && !source.IsConstant()) {
-    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
-    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
-  } else {
-    move->AddMove(source, destination, type, instruction);
-  }
-}
-
-void RegisterAllocator::AddInputMoveFor(HInstruction* input,
-                                        HInstruction* user,
-                                        Location source,
-                                        Location destination) const {
-  if (source.Equals(destination)) return;
-
-  DCHECK(!user->IsPhi());
-
-  HInstruction* previous = user->GetPrevious();
-  HParallelMove* move = nullptr;
-  if (previous == nullptr
-      || !previous->IsParallelMove()
-      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(user->GetLifetimePosition());
-    user->GetBlock()->InsertInstructionBefore(move, user);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
-  AddMove(move, source, destination, nullptr, input->GetType());
-}
-
-static bool IsInstructionStart(size_t position) {
-  return (position & 1) == 0;
-}
-
-static bool IsInstructionEnd(size_t position) {
-  return (position & 1) == 1;
-}
-
-void RegisterAllocator::InsertParallelMoveAt(size_t position,
-                                             HInstruction* instruction,
-                                             Location source,
-                                             Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
-  HParallelMove* move;
-  if (at == nullptr) {
-    if (IsInstructionStart(position)) {
-      // Block boundary, don't do anything the connection of split siblings will handle it.
-      return;
-    } else {
-      // Move must happen before the first instruction of the block.
-      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
-      // Note that parallel moves may have already been inserted, so we explicitly
-      // ask for the first instruction of the block: `GetInstructionFromPosition` does
-      // not contain the `HParallelMove` instructions.
-      at = at->GetBlock()->GetFirstInstruction();
-
-      if (at->GetLifetimePosition() < position) {
-        // We may insert moves for split siblings and phi spills at the beginning of the block.
-        // Since this is a different lifetime position, we need to go to the next instruction.
-        DCHECK(at->IsParallelMove());
-        at = at->GetNext();
-      }
-
-      if (at->GetLifetimePosition() != position) {
-        DCHECK_GT(at->GetLifetimePosition(), position);
-        move = new (allocator_) HParallelMove(allocator_);
-        move->SetLifetimePosition(position);
-        at->GetBlock()->InsertInstructionBefore(move, at);
-      } else {
-        DCHECK(at->IsParallelMove());
-        move = at->AsParallelMove();
-      }
-    }
-  } else if (IsInstructionEnd(position)) {
-    // Move must happen after the instruction.
-    DCHECK(!at->IsControlFlow());
-    move = at->GetNext()->AsParallelMove();
-    // This is a parallel move for connecting siblings in a same block. We need to
-    // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || move->GetLifetimePosition() > position) {
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
-    }
-  } else {
-    // Move must happen before the instruction.
-    HInstruction* previous = at->GetPrevious();
-    if (previous == nullptr
-        || !previous->IsParallelMove()
-        || previous->GetLifetimePosition() != position) {
-      // If the previous is a parallel move, then its position must be lower
-      // than the given `position`: it was added just after the non-parallel
-      // move instruction that precedes `instruction`.
-      DCHECK(previous == nullptr
-             || !previous->IsParallelMove()
-             || previous->GetLifetimePosition() < position);
-      move = new (allocator_) HParallelMove(allocator_);
-      move->SetLifetimePosition(position);
-      at->GetBlock()->InsertInstructionBefore(move, at);
-    } else {
-      move = previous->AsParallelMove();
-    }
-  }
-  DCHECK_EQ(move->GetLifetimePosition(), position);
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                                   HInstruction* instruction,
-                                                   Location source,
-                                                   Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
-  HInstruction* last = block->GetLastInstruction();
-  // We insert moves at exit for phi predecessors and connecting blocks.
-  // A block ending with an if or a packed switch cannot branch to a block
-  // with phis because we do not allow critical edges. It can also not connect
-  // a split interval between two blocks: the move has to happen in the successor.
-  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
-  HInstruction* previous = last->GetPrevious();
-  HParallelMove* move;
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and output moves.
-  size_t position = last->GetLifetimePosition();
-  if (previous == nullptr || !previous->IsParallelMove()
-      || previous->AsParallelMove()->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, last);
-  } else {
-    move = previous->AsParallelMove();
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                                    HInstruction* instruction,
-                                                    Location source,
-                                                    Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  HInstruction* first = block->GetFirstInstruction();
-  HParallelMove* move = first->AsParallelMove();
-  size_t position = block->GetLifetimeStart();
-  // This is a parallel move for connecting blocks. We need to differentiate
-  // it with moves for connecting siblings in a same block, and input moves.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    block->InsertInstructionBefore(move, first);
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
-                                        Location source,
-                                        Location destination) const {
-  DCHECK(IsValidDestination(destination)) << destination;
-  if (source.Equals(destination)) return;
-
-  if (instruction->IsPhi()) {
-    InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
-    return;
-  }
-
-  size_t position = instruction->GetLifetimePosition() + 1;
-  HParallelMove* move = instruction->GetNext()->AsParallelMove();
-  // This is a parallel move for moving the output of an instruction. We need
-  // to differentiate with input moves, moves for connecting siblings in a
-  // and moves for connecting blocks.
-  if (move == nullptr || move->GetLifetimePosition() != position) {
-    move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(position);
-    instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
-  }
-  AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
-  LiveInterval* current = interval;
-  if (current->HasSpillSlot()
-      && current->HasRegister()
-      // Currently, we spill unconditionnally the current method in the code generators.
-      && !interval->GetDefinedBy()->IsCurrentMethod()) {
-    // We spill eagerly, so move must be at definition.
-    InsertMoveAfter(interval->GetDefinedBy(),
-                    interval->ToLocation(),
-                    interval->NeedsTwoSpillSlots()
-                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
-                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
-  }
-  UsePosition* use = current->GetFirstUse();
-  UsePosition* env_use = current->GetFirstEnvironmentUse();
-
-  // Walk over all siblings, updating locations of use positions, and
-  // connecting them when they are adjacent.
-  do {
-    Location source = current->ToLocation();
-
-    // Walk over all uses covered by this interval, and update the location
-    // information.
-
-    LiveRange* range = current->GetFirstRange();
-    while (range != nullptr) {
-      while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->IsSynthesized());
-        use = use->GetNext();
-      }
-      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(!use->GetIsEnvironment());
-        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        if (!use->IsSynthesized()) {
-          LocationSummary* locations = use->GetUser()->GetLocations();
-          Location expected_location = locations->InAt(use->GetInputIndex());
-          // The expected (actual) location may be invalid in case the input is unused. Currently
-          // this only happens for intrinsics.
-          if (expected_location.IsValid()) {
-            if (expected_location.IsUnallocated()) {
-              locations->SetInAt(use->GetInputIndex(), source);
-            } else if (!expected_location.IsConstant()) {
-              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
-            }
-          } else {
-            DCHECK(use->GetUser()->IsInvoke());
-            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
-          }
-        }
-        use = use->GetNext();
-      }
-
-      // Walk over the environment uses, and update their locations.
-      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
-        env_use = env_use->GetNext();
-      }
-
-      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(env_use->GetPosition())
-               || (env_use->GetPosition() == range->GetEnd()));
-        HEnvironment* environment = env_use->GetEnvironment();
-        environment->SetLocationAt(env_use->GetInputIndex(), source);
-        env_use = env_use->GetNext();
-      }
-
-      range = range->GetNext();
-    }
-
-    // If the next interval starts just after this one, and has a register,
-    // insert a move.
-    LiveInterval* next_sibling = current->GetNextSibling();
-    if (next_sibling != nullptr
-        && next_sibling->HasRegister()
-        && current->GetEnd() == next_sibling->GetStart()) {
-      Location destination = next_sibling->ToLocation();
-      InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
-    }
-
-    for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
-         safepoint_position != nullptr;
-         safepoint_position = safepoint_position->GetNext()) {
-      DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
-
-      LocationSummary* locations = safepoint_position->GetLocations();
-      if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
-        DCHECK(interval->GetDefinedBy()->IsActualObject())
-            << interval->GetDefinedBy()->DebugName()
-            << "@" << safepoint_position->GetInstruction()->DebugName();
-        locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
-      }
-
-      switch (source.GetKind()) {
-        case Location::kRegister: {
-          locations->AddLiveRegister(source);
-          if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
-            DCHECK_LE(locations->GetNumberOfLiveRegisters(),
-                      maximum_number_of_live_core_registers_ +
-                      maximum_number_of_live_fp_registers_);
-          }
-          if (current->GetType() == Primitive::kPrimNot) {
-            DCHECK(interval->GetDefinedBy()->IsActualObject())
-                << interval->GetDefinedBy()->DebugName()
-                << "@" << safepoint_position->GetInstruction()->DebugName();
-            locations->SetRegisterBit(source.reg());
-          }
-          break;
-        }
-        case Location::kFpuRegister: {
-          locations->AddLiveRegister(source);
-          break;
-        }
-
-        case Location::kRegisterPair:
-        case Location::kFpuRegisterPair: {
-          locations->AddLiveRegister(source.ToLow());
-          locations->AddLiveRegister(source.ToHigh());
-          break;
-        }
-        case Location::kStackSlot:  // Fall-through
-        case Location::kDoubleStackSlot:  // Fall-through
-        case Location::kConstant: {
-          // Nothing to do.
-          break;
-        }
-        default: {
-          LOG(FATAL) << "Unexpected location for object";
-        }
-      }
-    }
-    current = next_sibling;
-  } while (current != nullptr);
-
-  if (kIsDebugBuild) {
-    // Following uses can only be synthesized uses.
-    while (use != nullptr) {
-      DCHECK(use->IsSynthesized());
-      use = use->GetNext();
-    }
-  }
-}
-
-static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
-    HInstruction* instruction) {
-  return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
-         (instruction->IsConstant() || instruction->IsCurrentMethod());
-}
-
-void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
-                                             HBasicBlock* from,
-                                             HBasicBlock* to) const {
-  if (interval->GetNextSibling() == nullptr) {
-    // Nothing to connect. The whole range was allocated to the same location.
-    return;
-  }
-
-  // Find the intervals that cover `from` and `to`.
-  size_t destination_position = to->GetLifetimeStart();
-  size_t source_position = from->GetLifetimeEnd() - 1;
-  LiveInterval* destination = interval->GetSiblingAt(destination_position);
-  LiveInterval* source = interval->GetSiblingAt(source_position);
-
-  if (destination == source) {
-    // Interval was not split.
-    return;
-  }
-
-  LiveInterval* parent = interval->GetParent();
-  HInstruction* defined_by = parent->GetDefinedBy();
-  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
-      (destination == nullptr || !destination->CoversSlow(destination_position))) {
-    // Our live_in fixed point calculation has found that the instruction is live
-    // in the `to` block because it will eventually enter an irreducible loop. Our
-    // live interval computation however does not compute a fixed point, and
-    // therefore will not have a location for that instruction for `to`.
-    // Because the instruction is a constant or the ArtMethod, we don't need to
-    // do anything: it will be materialized in the irreducible loop.
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
-        << defined_by->DebugName() << ":" << defined_by->GetId()
-        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
-    return;
-  }
-
-  if (!destination->HasRegister()) {
-    // Values are eagerly spilled. Spill slot already contains appropriate value.
-    return;
-  }
-
-  Location location_source;
-  // `GetSiblingAt` returns the interval whose start and end cover `position`,
-  // but does not check whether the interval is inactive at that position.
-  // The only situation where the interval is inactive at that position is in the
-  // presence of irreducible loops for constants and ArtMethod.
-  if (codegen_->GetGraph()->HasIrreducibleLoops() &&
-      (source == nullptr || !source->CoversSlow(source_position))) {
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
-    if (defined_by->IsConstant()) {
-      location_source = defined_by->GetLocations()->Out();
-    } else {
-      DCHECK(defined_by->IsCurrentMethod());
-      location_source = parent->NeedsTwoSpillSlots()
-          ? Location::DoubleStackSlot(parent->GetSpillSlot())
-          : Location::StackSlot(parent->GetSpillSlot());
-    }
-  } else {
-    DCHECK(source != nullptr);
-    DCHECK(source->CoversSlow(source_position));
-    DCHECK(destination->CoversSlow(destination_position));
-    location_source = source->ToLocation();
-  }
-
-  // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
-  // we need to put the moves at the entry of `to`.
-  if (from->GetNormalSuccessors().size() == 1) {
-    InsertParallelMoveAtExitOf(from,
-                               defined_by,
-                               location_source,
-                               destination->ToLocation());
-  } else {
-    DCHECK_EQ(to->GetPredecessors().size(), 1u);
-    InsertParallelMoveAtEntryOf(to,
-                                defined_by,
-                                location_source,
-                                destination->ToLocation());
-  }
-}
-
-void RegisterAllocator::Resolve() {
-  codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
-                                     maximum_number_of_live_core_registers_,
-                                     maximum_number_of_live_fp_registers_,
-                                     reserved_out_slots_,
-                                     codegen_->GetGraph()->GetLinearOrder());
-
-  // Adjust the Out Location of instructions.
-  // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    LiveInterval* current = instruction->GetLiveInterval();
-    LocationSummary* locations = instruction->GetLocations();
-    Location location = locations->Out();
-    if (instruction->IsParameterValue()) {
-      // Now that we know the frame size, adjust the parameter's location.
-      if (location.IsStackSlot()) {
-        location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (location.IsDoubleStackSlot()) {
-        location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
-        current->SetSpillSlot(location.GetStackIndex());
-        locations->UpdateOut(location);
-      } else if (current->HasSpillSlot()) {
-        current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
-      }
-    } else if (instruction->IsCurrentMethod()) {
-      // The current method is always at offset 0.
-      DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
-    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-      DCHECK(current->HasSpillSlot());
-      size_t slot = current->GetSpillSlot()
-                    + GetNumberOfSpillSlots()
-                    + reserved_out_slots_
-                    - catch_phi_spill_slots_;
-      current->SetSpillSlot(slot * kVRegSize);
-    } else if (current->HasSpillSlot()) {
-      // Adjust the stack slot, now that we know the number of them for each type.
-      // The way this implementation lays out the stack is the following:
-      // [parameter slots       ]
-      // [catch phi spill slots ]
-      // [double spill slots    ]
-      // [long spill slots      ]
-      // [float spill slots     ]
-      // [int/ref values        ]
-      // [maximum out values    ] (number of arguments for calls)
-      // [art method            ].
-      size_t slot = current->GetSpillSlot();
-      switch (current->GetType()) {
-        case Primitive::kPrimDouble:
-          slot += long_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimLong:
-          slot += float_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimFloat:
-          slot += int_spill_slots_.size();
-          FALLTHROUGH_INTENDED;
-        case Primitive::kPrimNot:
-        case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
-        case Primitive::kPrimByte:
-        case Primitive::kPrimBoolean:
-        case Primitive::kPrimShort:
-          slot += reserved_out_slots_;
-          break;
-        case Primitive::kPrimVoid:
-          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
-      }
-      current->SetSpillSlot(slot * kVRegSize);
-    }
-
-    Location source = current->ToLocation();
-
-    if (location.IsUnallocated()) {
-      if (location.GetPolicy() == Location::kSameAsFirstInput) {
-        if (locations->InAt(0).IsUnallocated()) {
-          locations->SetInAt(0, source);
-        } else {
-          DCHECK(locations->InAt(0).Equals(source));
-        }
-      }
-      locations->UpdateOut(source);
-    } else {
-      DCHECK(source.Equals(location));
-    }
-  }
-
-  // Connect siblings.
-  for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
-    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-    ConnectSiblings(instruction->GetLiveInterval());
-  }
-
-  // Resolve non-linear control flow across branches. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    if (block->IsCatchBlock() ||
-        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
-      // Instructions live at the top of catch blocks or irreducible loop header
-      // were forced to spill.
-      if (kIsDebugBuild) {
-        BitVector* live = liveness_.GetLiveInSet(*block);
-        for (uint32_t idx : live->Indexes()) {
-          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-          LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
-          // `GetSiblingAt` returns the sibling that contains a position, but there could be
-          // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
-          // position.
-          if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
-            DCHECK(!sibling->HasRegister());
-          }
-        }
-      }
-    } else {
-      BitVector* live = liveness_.GetLiveInSet(*block);
-      for (uint32_t idx : live->Indexes()) {
-        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
-        for (HBasicBlock* predecessor : block->GetPredecessors()) {
-          ConnectSplitSiblings(interval, predecessor, block);
-        }
-      }
-    }
-  }
-
-  // Resolve phi inputs. Order does not matter.
-  for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsCatchBlock()) {
-      // Catch phi values are set at runtime by the exception delivery mechanism.
-    } else {
-      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* phi = inst_it.Current();
-        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
-          HBasicBlock* predecessor = current->GetPredecessors()[i];
-          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
-          HInstruction* input = phi->InputAt(i);
-          Location source = input->GetLiveInterval()->GetLocationAt(
-              predecessor->GetLifetimeEnd() - 1);
-          Location destination = phi->GetLiveInterval()->ToLocation();
-          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
-        }
-      }
-    }
-  }
-
-  // Assign temp locations.
-  for (LiveInterval* temp : temp_intervals_) {
-    if (temp->IsHighInterval()) {
-      // High intervals can be skipped, they are already handled by the low interval.
-      continue;
-    }
-    HInstruction* at = liveness_.GetTempUser(temp);
-    size_t temp_index = liveness_.GetTempIndex(temp);
-    LocationSummary* locations = at->GetLocations();
-    switch (temp->GetType()) {
-      case Primitive::kPrimInt:
-        locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
-        break;
-
-      case Primitive::kPrimDouble:
-        if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
-          Location location = Location::FpuRegisterPairLocation(
-              temp->GetRegister(), temp->GetHighInterval()->GetRegister());
-          locations->SetTempAt(temp_index, location);
-        } else {
-          locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
-        }
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type for temporary location "
-                   << temp->GetType();
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 58600b7..729eede 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "base/macros.h"
 #include "primitive.h"
 
@@ -29,36 +30,40 @@
 class HGraph;
 class HInstruction;
 class HParallelMove;
-class HPhi;
 class LiveInterval;
 class Location;
 class SsaLivenessAnalysis;
 
 /**
- * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ * Base class for any register allocator.
  */
-class RegisterAllocator {
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
  public:
-  RegisterAllocator(ArenaAllocator* allocator,
-                    CodeGenerator* codegen,
-                    const SsaLivenessAnalysis& analysis);
+  enum Strategy {
+    kRegisterAllocatorLinearScan
+  };
+
+  static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+  static RegisterAllocator* Create(ArenaAllocator* allocator,
+                                   CodeGenerator* codegen,
+                                   const SsaLivenessAnalysis& analysis,
+                                   Strategy strategy = kRegisterAllocatorDefault);
+
+  virtual ~RegisterAllocator() = default;
 
   // Main entry point for the register allocator. Given the liveness analysis,
   // allocates registers to live intervals.
-  void AllocateRegisters();
+  virtual void AllocateRegisters() = 0;
 
   // Validate that the register allocator did not allocate the same register to
-  // intervals that intersect each other. Returns false if it did not.
-  bool Validate(bool log_fatal_on_failure) {
-    processing_core_registers_ = true;
-    if (!ValidateInternal(log_fatal_on_failure)) {
-      return false;
-    }
-    processing_core_registers_ = false;
-    return ValidateInternal(log_fatal_on_failure);
-  }
+  // intervals that intersect each other. Returns false if it failed.
+  virtual bool Validate(bool log_fatal_on_failure) = 0;
 
-  // Helper method for validation. Used by unit testing.
+  static bool CanAllocateRegistersFor(const HGraph& graph,
+                                      InstructionSet instruction_set);
+
+  // Verifies that live intervals do not conflict. Used by unit testing.
   static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                 size_t number_of_spill_slots,
                                 size_t number_of_out_slots,
@@ -67,178 +72,25 @@
                                 bool processing_core_registers,
                                 bool log_fatal_on_failure);
 
-  static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
-  size_t GetNumberOfSpillSlots() const {
-    return int_spill_slots_.size()
-        + long_spill_slots_.size()
-        + float_spill_slots_.size()
-        + double_spill_slots_.size()
-        + catch_phi_spill_slots_;
-  }
-
   static constexpr const char* kRegisterAllocatorPassName = "register";
 
- private:
-  // Main methods of the allocator.
-  void LinearScan();
-  bool TryAllocateFreeReg(LiveInterval* interval);
-  bool AllocateBlockedReg(LiveInterval* interval);
-  void Resolve();
-
-  // Add `interval` in the given sorted list.
-  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+ protected:
+  RegisterAllocator(ArenaAllocator* allocator,
+                    CodeGenerator* codegen,
+                    const SsaLivenessAnalysis& analysis);
 
   // Split `interval` at the position `position`. The new interval starts at `position`.
-  LiveInterval* Split(LiveInterval* interval, size_t position);
+  // If `position` is at the start of `interval`, returns `interval` with its
+  // register location(s) cleared.
+  static LiveInterval* Split(LiveInterval* interval, size_t position);
 
   // Split `interval` at a position between `from` and `to`. The method will try
   // to find an optimal split position.
   LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
 
-  // Returns whether `reg` is blocked by the code generator.
-  bool IsBlocked(int reg) const;
-
-  // Update the interval for the register in `location` to cover [start, end).
-  void BlockRegister(Location location, size_t start, size_t end);
-  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
-  // Allocate a spill slot for the given interval. Should be called in linear
-  // order of interval starting positions.
-  void AllocateSpillSlotFor(LiveInterval* interval);
-
-  // Allocate a spill slot for the given catch phi. Will allocate the same slot
-  // for phis which share the same vreg. Must be called in reverse linear order
-  // of lifetime positions and ascending vreg numbers for correctness.
-  void AllocateSpillSlotForCatchPhi(HPhi* phi);
-
-  // Connect adjacent siblings within blocks.
-  void ConnectSiblings(LiveInterval* interval);
-
-  // Connect siblings between block entries and exits.
-  void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
-
-  // Helper methods to insert parallel moves in the graph.
-  void InsertParallelMoveAtExitOf(HBasicBlock* block,
-                                  HInstruction* instruction,
-                                  Location source,
-                                  Location destination) const;
-  void InsertParallelMoveAtEntryOf(HBasicBlock* block,
-                                   HInstruction* instruction,
-                                   Location source,
-                                   Location destination) const;
-  void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
-  void AddInputMoveFor(HInstruction* input,
-                       HInstruction* user,
-                       Location source,
-                       Location destination) const;
-  void InsertParallelMoveAt(size_t position,
-                            HInstruction* instruction,
-                            Location source,
-                            Location destination) const;
-
-  void AddMove(HParallelMove* move,
-               Location source,
-               Location destination,
-               HInstruction* instruction,
-               Primitive::Type type) const;
-
-  // Helper methods.
-  void AllocateRegistersInternal();
-  void ProcessInstruction(HInstruction* instruction);
-  bool ValidateInternal(bool log_fatal_on_failure) const;
-  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
-  void DumpAllIntervals(std::ostream& stream) const;
-  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
-  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
-  bool IsCallerSaveRegister(int reg) const;
-
-  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
-  // Returns whether it was successful at finding such an interval.
-  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
-                                                size_t first_register_use,
-                                                size_t* next_use);
-
   ArenaAllocator* const allocator_;
   CodeGenerator* const codegen_;
   const SsaLivenessAnalysis& liveness_;
-
-  // List of intervals for core registers that must be processed, ordered by start
-  // position. Last entry is the interval that has the lowest start position.
-  // This list is initially populated before doing the linear scan.
-  ArenaVector<LiveInterval*> unhandled_core_intervals_;
-
-  // List of intervals for floating-point registers. Same comments as above.
-  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
-
-  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
-  // or `unhandled_fp_intervals_`.
-  ArenaVector<LiveInterval*>* unhandled_;
-
-  // List of intervals that have been processed.
-  ArenaVector<LiveInterval*> handled_;
-
-  // List of intervals that are currently active when processing a new live interval.
-  // That is, they have a live range that spans the start of the new interval.
-  ArenaVector<LiveInterval*> active_;
-
-  // List of intervals that are currently inactive when processing a new live interval.
-  // That is, they have a lifetime hole that spans the start of the new interval.
-  ArenaVector<LiveInterval*> inactive_;
-
-  // Fixed intervals for physical registers. Such intervals cover the positions
-  // where an instruction requires a specific register.
-  ArenaVector<LiveInterval*> physical_core_register_intervals_;
-  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
-
-  // Intervals for temporaries. Such intervals cover the positions
-  // where an instruction requires a temporary.
-  ArenaVector<LiveInterval*> temp_intervals_;
-
-  // The spill slots allocated for live intervals. We ensure spill slots
-  // are typed to avoid (1) doing moves and swaps between two different kinds
-  // of registers, and (2) swapping between a single stack slot and a double
-  // stack slot. This simplifies the parallel move resolver.
-  ArenaVector<size_t> int_spill_slots_;
-  ArenaVector<size_t> long_spill_slots_;
-  ArenaVector<size_t> float_spill_slots_;
-  ArenaVector<size_t> double_spill_slots_;
-
-  // Spill slots allocated to catch phis. This category is special-cased because
-  // (1) slots are allocated prior to linear scan and in reverse linear order,
-  // (2) equivalent phis need to share slots despite having different types.
-  size_t catch_phi_spill_slots_;
-
-  // Instructions that need a safepoint.
-  ArenaVector<HInstruction*> safepoints_;
-
-  // True if processing core registers. False if processing floating
-  // point registers.
-  bool processing_core_registers_;
-
-  // Number of registers for the current register kind (core or floating point).
-  size_t number_of_registers_;
-
-  // Temporary array, allocated ahead of time for simplicity.
-  size_t* registers_array_;
-
-  // Blocked registers, as decided by the code generator.
-  bool* const blocked_core_registers_;
-  bool* const blocked_fp_registers_;
-
-  // Slots reserved for out arguments.
-  size_t reserved_out_slots_;
-
-  // The maximum live core registers at safepoints.
-  size_t maximum_number_of_live_core_registers_;
-
-  // The maximum live FP registers at safepoints.
-  size_t maximum_number_of_live_fp_registers_;
-
-  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
-
-  DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
new file mode 100644
index 0000000..a9151ba
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_linear_scan.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+  return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
+
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                                                         CodeGenerator* codegen,
+                                                         const SsaLivenessAnalysis& liveness)
+      : RegisterAllocator(allocator, codegen, liveness),
+        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_(nullptr),
+        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        catch_phi_spill_slots_(0),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        processing_core_registers_(false),
+        number_of_registers_(-1),
+        registers_array_(nullptr),
+        blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+        blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
+        reserved_out_slots_(0),
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
+  temp_intervals_.reserve(4);
+  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
+  codegen->SetupBlockedRegisters();
+  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+  // Always reserve for the current method and the graph's max out registers.
+  // TODO: compute it instead.
+  // ArtMethod* takes 2 vregs for 64 bits.
+  reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
+      codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+  if (interval == nullptr) return false;
+  bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+      && (interval->GetType() != Primitive::kPrimFloat);
+  return processing_core_registers == is_core_register;
+}
+
+void RegisterAllocatorLinearScan::AllocateRegisters() {
+  AllocateRegistersInternal();
+  RegisterAllocationResolver(allocator_, codegen_, liveness_)
+      .Resolve(maximum_number_of_live_core_registers_,
+               maximum_number_of_live_fp_registers_,
+               reserved_out_slots_,
+               int_spill_slots_.size(),
+               long_spill_slots_.size(),
+               float_spill_slots_.size(),
+               double_spill_slots_.size(),
+               catch_phi_spill_slots_,
+               temp_intervals_);
+
+  if (kIsDebugBuild) {
+    processing_core_registers_ = true;
+    ValidateInternal(true);
+    processing_core_registers_ = false;
+    ValidateInternal(true);
+    // Check that the linear order is still correct with regards to lifetime positions.
+    // Since only parallel moves have been inserted during the register allocation,
+    // these checks are mostly for making sure these moves have been added correctly.
+    size_t current_liveness = 0;
+    for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+        current_liveness = instruction->GetLifetimePosition();
+      }
+      for (HInstructionIterator inst_it(block->GetInstructions());
+           !inst_it.Done();
+           inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+        current_liveness = instruction->GetLifetimePosition();
+      }
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+  int reg = location.reg();
+  DCHECK(location.IsRegister() || location.IsFpuRegister());
+  LiveInterval* interval = location.IsRegister()
+      ? physical_core_register_intervals_[reg]
+      : physical_fp_register_intervals_[reg];
+  Primitive::Type type = location.IsRegister()
+      ? Primitive::kPrimInt
+      : Primitive::kPrimFloat;
+  if (interval == nullptr) {
+    interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+    if (location.IsRegister()) {
+      physical_core_register_intervals_[reg] = interval;
+    } else {
+      physical_fp_register_intervals_[reg] = interval;
+    }
+  }
+  DCHECK(interval->GetRegister() == reg);
+  interval->AddRange(start, end);
+}
+
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
+  // Iterate post-order, to ensure the list is sorted, and the last added interval
+  // is the one with the lowest start position.
+  for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
+         back_it.Advance()) {
+      ProcessInstruction(back_it.Current());
+    }
+    for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      ProcessInstruction(inst_it.Current());
+    }
+
+    if (block->IsCatchBlock() ||
+        (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+      // By blocking all registers at the top of each catch block or irreducible loop, we force
+      // intervals belonging to the live-in set of the catch/header block to be spilled.
+      // TODO(ngeoffray): Phis in this block could be allocated in register.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
+  }
+
+  number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = true;
+  unhandled_ = &unhandled_core_intervals_;
+  for (LiveInterval* fixed : physical_core_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+
+  inactive_.clear();
+  active_.clear();
+  handled_.clear();
+
+  number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
+  processing_core_registers_ = false;
+  unhandled_ = &unhandled_fp_intervals_;
+  for (LiveInterval* fixed : physical_fp_register_intervals_) {
+    if (fixed != nullptr) {
+      // Fixed interval is added to inactive_ instead of unhandled_.
+      // It's also the only type of inactive interval whose start position
+      // can be after the current interval during linear scan.
+      // Fixed interval is never split and never moves to unhandled_.
+      inactive_.push_back(fixed);
+    }
+  }
+  LinearScan();
+}
+
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  size_t position = instruction->GetLifetimePosition();
+
+  if (locations == nullptr) return;
+
+  // Create synthesized intervals for temporaries.
+  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+    Location temp = locations->GetTemp(i);
+    if (temp.IsRegister() || temp.IsFpuRegister()) {
+      BlockRegister(temp, position, position + 1);
+      // Ensure that an explicit temporary register is marked as being allocated.
+      codegen_->AddAllocatedRegister(temp);
+    } else {
+      DCHECK(temp.IsUnallocated());
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          unhandled_core_intervals_.push_back(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          temp_intervals_.push_back(interval);
+          interval->AddTempUse(instruction, i);
+          if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+            interval->AddHighInterval(/* is_temp */ true);
+            LiveInterval* high = interval->GetHighInterval();
+            temp_intervals_.push_back(high);
+            unhandled_fp_intervals_.push_back(high);
+          }
+          unhandled_fp_intervals_.push_back(interval);
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
+    }
+  }
+
+  bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+      && (instruction->GetType() != Primitive::kPrimFloat);
+
+  if (locations->NeedsSafepoint()) {
+    if (codegen_->IsLeafMethod()) {
+      // TODO: We do this here because we do not want the suspend check to artificially
+      // create live registers. We should find another place, but this is currently the
+      // simplest.
+      DCHECK(instruction->IsSuspendCheckEntry());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    }
+    safepoints_.push_back(instruction);
+    if (locations->OnlyCallsOnSlowPath()) {
+      // We add a synthesized range at this position to record the live registers
+      // at this position. Ideally, we could just update the safepoints when locations
+      // are updated, but we currently need to know the full stack size before updating
+      // locations (because of parameters and the fact that we don't have a frame pointer).
+      // And knowing the full stack size requires to know the maximum number of live
+      // registers at calls in slow paths.
+      // By adding the following interval in the algorithm, we can compute this
+      // maximum before updating locations.
+      LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+      interval->AddRange(position, position + 1);
+      AddSorted(&unhandled_core_intervals_, interval);
+      AddSorted(&unhandled_fp_intervals_, interval);
+    }
+  }
+
+  if (locations->WillCall()) {
+    BlockRegisters(position, position + 1, /* caller_save_only */ true);
+  }
+
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+    Location input = locations->InAt(i);
+    if (input.IsRegister() || input.IsFpuRegister()) {
+      BlockRegister(input, position, position + 1);
+    } else if (input.IsPair()) {
+      BlockRegister(input.ToLow(), position, position + 1);
+      BlockRegister(input.ToHigh(), position, position + 1);
+    }
+  }
+
+  LiveInterval* current = instruction->GetLiveInterval();
+  if (current == nullptr) return;
+
+  ArenaVector<LiveInterval*>& unhandled = core_register
+      ? unhandled_core_intervals_
+      : unhandled_fp_intervals_;
+
+  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
+
+  if (codegen_->NeedsTwoRegisters(current->GetType())) {
+    current->AddHighInterval();
+  }
+
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+    size_t safepoint_position = safepoint->GetLifetimePosition();
+
+    // Test that safepoints are ordered in the optimal way.
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+    if (safepoint_position == current->GetStart()) {
+      // The safepoint is for this instruction, so the location of the instruction
+      // does not need to be saved.
+      DCHECK_EQ(safepoint_index, safepoints_.size());
+      DCHECK_EQ(safepoint, instruction);
+      continue;
+    } else if (current->IsDeadAt(safepoint_position)) {
+      break;
+    } else if (!current->Covers(safepoint_position)) {
+      // Hole in the interval.
+      continue;
+    }
+    current->AddSafepoint(safepoint);
+  }
+  current->ResetSearchCache();
+
+  // Some instructions define their output in fixed register/stack slot. We need
+  // to ensure we know these locations before doing register allocation. For a
+  // given register, we create an interval that covers these locations. The register
+  // will be unavailable at these locations when trying to allocate one for an
+  // interval.
+  //
+  // The backwards walking ensures the ranges are ordered on increasing start positions.
+  Location output = locations->Out();
+  if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
+    Location first = locations->InAt(0);
+    if (first.IsRegister() || first.IsFpuRegister()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.reg());
+    } else if (first.IsPair()) {
+      current->SetFrom(position + 1);
+      current->SetRegister(first.low());
+      LiveInterval* high = current->GetHighInterval();
+      high->SetRegister(first.high());
+      high->SetFrom(position + 1);
+    }
+  } else if (output.IsRegister() || output.IsFpuRegister()) {
+    // Shift the interval's start by one to account for the blocked register.
+    current->SetFrom(position + 1);
+    current->SetRegister(output.reg());
+    BlockRegister(output, position, position + 1);
+  } else if (output.IsPair()) {
+    current->SetFrom(position + 1);
+    current->SetRegister(output.low());
+    LiveInterval* high = current->GetHighInterval();
+    high->SetRegister(output.high());
+    high->SetFrom(position + 1);
+    BlockRegister(output.ToLow(), position, position + 1);
+    BlockRegister(output.ToHigh(), position, position + 1);
+  } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
+    current->SetSpillSlot(output.GetStackIndex());
+  } else {
+    DCHECK(output.IsUnallocated() || output.IsConstant());
+  }
+
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+  }
+
+  // If needed, add interval to the list of unhandled intervals.
+  if (current->HasSpillSlot() || instruction->IsConstant()) {
+    // Split just before first register use.
+    size_t first_register_use = current->FirstRegisterUse();
+    if (first_register_use != kNoLifetime) {
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      // Don't add directly to `unhandled`, it needs to be sorted and the start
+      // of this new interval might be after intervals already in the list.
+      AddSorted(&unhandled, split);
+    } else {
+      // Nothing to do, we won't allocate a register for this value.
+    }
+  } else {
+    // Don't add directly to `unhandled`, temp or safepoint intervals
+    // for this instruction may have been added, and those can be
+    // processed first.
+    AddSorted(&unhandled, current);
+  }
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+  explicit AllRangesIterator(LiveInterval* interval)
+      : current_interval_(interval),
+        current_range_(interval->GetFirstRange()) {}
+
+  bool Done() const { return current_interval_ == nullptr; }
+  LiveRange* CurrentRange() const { return current_range_; }
+  LiveInterval* CurrentInterval() const { return current_interval_; }
+
+  void Advance() {
+    current_range_ = current_range_->GetNext();
+    if (current_range_ == nullptr) {
+      current_interval_ = current_interval_->GetNextSibling();
+      if (current_interval_ != nullptr) {
+        current_range_ = current_interval_->GetFirstRange();
+      }
+    }
+  }
+
+ private:
+  LiveInterval* current_interval_;
+  LiveRange* current_range_;
+
+  DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+  // To simplify unit testing, we eagerly create the array of intervals, and
+  // call the helper method.
+  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+  for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+    HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+    if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+      intervals.push_back(instruction->GetLiveInterval());
+    }
+  }
+
+  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+      ? &physical_core_register_intervals_
+      : &physical_fp_register_intervals_;
+  for (LiveInterval* fixed : *physical_register_intervals) {
+    if (fixed != nullptr) {
+      intervals.push_back(fixed);
+    }
+  }
+
+  for (LiveInterval* temp : temp_intervals_) {
+    if (ShouldProcess(processing_core_registers_, temp)) {
+      intervals.push_back(temp);
+    }
+  }
+
+  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
+                           allocator_, processing_core_registers_, log_fatal_on_failure);
+}
+
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+  interval->Dump(stream);
+  stream << ": ";
+  if (interval->HasRegister()) {
+    if (interval->IsFloatingPoint()) {
+      codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+    } else {
+      codegen_->DumpCoreRegister(stream, interval->GetRegister());
+    }
+  } else {
+    stream << "spilled";
+  }
+  stream << std::endl;
+}
+
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
+  stream << "inactive: " << std::endl;
+  for (LiveInterval* inactive_interval : inactive_) {
+    DumpInterval(stream, inactive_interval);
+  }
+  stream << "active: " << std::endl;
+  for (LiveInterval* active_interval : active_) {
+    DumpInterval(stream, active_interval);
+  }
+  stream << "unhandled: " << std::endl;
+  auto unhandled = (unhandled_ != nullptr) ?
+      unhandled_ : &unhandled_core_intervals_;
+  for (LiveInterval* unhandled_interval : *unhandled) {
+    DumpInterval(stream, unhandled_interval);
+  }
+  stream << "handled: " << std::endl;
+  for (LiveInterval* handled_interval : handled_) {
+    DumpInterval(stream, handled_interval);
+  }
+}
+
+// By the book implementation of a linear scan register allocator.
+void RegisterAllocatorLinearScan::LinearScan() {
+  while (!unhandled_->empty()) {
+    // (1) Remove interval with the lowest start position from unhandled.
+    LiveInterval* current = unhandled_->back();
+    unhandled_->pop_back();
+
+    // Make sure the interval is an expected state.
+    DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+    // Make sure we are going in the right order.
+    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
+    // Make sure a low interval is always with a high.
+    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
+    // Make sure a high interval is always with a low.
+    DCHECK(current->IsLowInterval() ||
+           unhandled_->empty() ||
+           !unhandled_->back()->IsHighInterval());
+
+    size_t position = current->GetStart();
+
+    // Remember the inactive_ size here since the ones moved to inactive_ from
+    // active_ below shouldn't need to be re-checked.
+    size_t inactive_intervals_to_handle = inactive_.size();
+
+    // (2) Remove currently active intervals that are dead at this position.
+    //     Move active intervals that have a lifetime hole at this position
+    //     to inactive.
+    auto active_kept_end = std::remove_if(
+        active_.begin(),
+        active_.end(),
+        [this, position](LiveInterval* interval) {
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (!interval->Covers(position)) {
+            inactive_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    active_.erase(active_kept_end, active_.end());
+
+    // (3) Remove currently inactive intervals that are dead at this position.
+    //     Move inactive intervals that cover this position to active.
+    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+    auto inactive_kept_end = std::remove_if(
+        inactive_.begin(),
+        inactive_to_handle_end,
+        [this, position](LiveInterval* interval) {
+          DCHECK(interval->GetStart() < position || interval->IsFixed());
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (interval->Covers(position)) {
+            active_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
+    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
+
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.size());
+      }
+      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
+      continue;
+    }
+
+    if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+      DCHECK(!current->HasRegister());
+      // Allocating the low part was unsucessful. The splitted interval for the high part
+      // will be handled next (it is in the `unhandled_` list).
+      continue;
+    }
+
+    // (4) Try to find an available register.
+    bool success = TryAllocateFreeReg(current);
+
+    // (5) If no register could be found, we need to spill.
+    if (!success) {
+      success = AllocateBlockedReg(current);
+    }
+
+    // (6) If the interval had a register allocated, add it to the list of active
+    //     intervals.
+    if (success) {
+      codegen_->AddAllocatedRegister(processing_core_registers_
+          ? Location::RegisterLocation(current->GetRegister())
+          : Location::FpuRegisterLocation(current->GetRegister()));
+      active_.push_back(current);
+      if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+        current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+      }
+    }
+  }
+}
+
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+  DCHECK(!interval->IsHighInterval());
+  // Note that the same instruction may occur multiple times in the input list,
+  // so `free_until` may have changed already.
+  // Since `position` is not the current scan position, we need to use CoversSlow.
+  if (interval->IsDeadAt(position)) {
+    // Set the register to be free. Note that inactive intervals might later
+    // update this.
+    free_until[interval->GetRegister()] = kMaxLifetimePosition;
+    if (interval->HasHighInterval()) {
+      DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+    }
+  } else if (!interval->CoversSlow(position)) {
+    // The interval becomes inactive at `defined_by`. We make its register
+    // available only until the next use strictly after `defined_by`.
+    free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+    if (interval->HasHighInterval()) {
+      DCHECK(!interval->GetHighInterval()->CoversSlow(position));
+      free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+    }
+  }
+}
+
+// Find a free register. If multiple are found, pick the register that
+// is free the longest.
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
+  size_t* free_until = registers_array_;
+
+  // First set all registers to be free.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    free_until[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, set its register to not free.
+  for (LiveInterval* interval : active_) {
+    DCHECK(interval->HasRegister());
+    free_until[interval->GetRegister()] = 0;
+  }
+
+  // An interval that starts an instruction (that is, it is not split), may
+  // re-use the registers used by the inputs of that instruciton, based on the
+  // location summary.
+  HInstruction* defined_by = current->GetDefinedBy();
+  if (defined_by != nullptr && !current->IsSplit()) {
+    LocationSummary* locations = defined_by->GetLocations();
+    if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+      HInputsRef inputs = defined_by->GetInputs();
+      for (size_t i = 0; i < inputs.size(); ++i) {
+        // Take the last interval of the input. It is the location of that interval
+        // that will be used at `defined_by`.
+        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+        // Note that interval may have not been processed yet.
+        // TODO: Handle non-split intervals last in the work list.
+        if (locations->InAt(i).IsValid()
+            && interval->HasRegister()
+            && interval->SameRegisterKind(*current)) {
+          // The input must be live until the end of `defined_by`, to comply to
+          // the linear scan algorithm. So we use `defined_by`'s end lifetime
+          // position to check whether the input is dead or is inactive after
+          // `defined_by`.
+          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+          size_t position = defined_by->GetLifetimePosition() + 1;
+          FreeIfNotCoverAt(interval, position, free_until);
+        }
+      }
+    }
+  }
+
+  // For each inactive interval, set its register to be free until
+  // the next intersection with `current`.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+
+    DCHECK(inactive->HasRegister());
+    if (free_until[inactive->GetRegister()] == 0) {
+      // Already used by some active interval. No need to intersect.
+      continue;
+    }
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      free_until[inactive->GetRegister()] =
+          std::min(free_until[inactive->GetRegister()], next_intersection);
+    }
+  }
+
+  int reg = kNoRegister;
+  if (current->HasRegister()) {
+    // Some instructions have a fixed register output.
+    reg = current->GetRegister();
+    if (free_until[reg] == 0) {
+      DCHECK(current->IsHighInterval());
+      // AllocateBlockedReg will spill the holder of the register.
+      return false;
+    }
+  } else {
+    DCHECK(!current->IsHighInterval());
+    int hint = current->FindFirstRegisterHint(free_until, liveness_);
+    if ((hint != kNoRegister)
+        // For simplicity, if the hint we are getting for a pair cannot be used,
+        // we are just going to allocate a new pair.
+        && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
+      DCHECK(!IsBlocked(hint));
+      reg = hint;
+    } else if (current->IsLowInterval()) {
+      reg = FindAvailableRegisterPair(free_until, current->GetStart());
+    } else {
+      reg = FindAvailableRegister(free_until, current);
+    }
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  // If we could not find a register, we need to spill.
+  if (free_until[reg] == 0) {
+    return false;
+  }
+
+  if (current->IsLowInterval()) {
+    // If the high register of this interval is not available, we need to spill.
+    int high_reg = current->GetHighInterval()->GetRegister();
+    if (high_reg == kNoRegister) {
+      high_reg = GetHighForLowRegister(reg);
+    }
+    if (free_until[high_reg] == 0) {
+      return false;
+    }
+  }
+
+  current->SetRegister(reg);
+  if (!current->IsDeadAt(free_until[reg])) {
+    // If the register is only available for a subset of live ranges
+    // covered by `current`, split `current` before the position where
+    // the register is not available anymore.
+    LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
+    DCHECK(split != nullptr);
+    AddSorted(unhandled_, split);
+  }
+  return true;
+}
+
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
+  return processing_core_registers_
+      ? blocked_core_registers_[reg]
+      : blocked_fp_registers_[reg];
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+  int reg = kNoRegister;
+  // Pick the register pair that is used the last.
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) continue;
+    if (!IsLowRegister(i)) continue;
+    int high_register = GetHighForLowRegister(i);
+    if (IsBlocked(high_register)) continue;
+    int existing_high_register = GetHighForLowRegister(reg);
+    if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+                        && next_use[high_register] >= next_use[existing_high_register])) {
+      reg = i;
+      if (next_use[i] == kMaxLifetimePosition
+          && next_use[high_register] == kMaxLifetimePosition) {
+        break;
+      }
+    } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+      // If one of the current register is known to be unavailable, just unconditionally
+      // try a new one.
+      reg = i;
+    }
+  }
+  return reg;
+}
+
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
+  return processing_core_registers_
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+  // We special case intervals that do not span a safepoint to try to find a caller-save
+  // register if one is available. We iterate from 0 to the number of registers,
+  // so if there are caller-save registers available at the end, we continue the iteration.
+  bool prefers_caller_save = !current->HasWillCallSafepoint();
+  int reg = kNoRegister;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    if (IsBlocked(i)) {
+      // Register cannot be used. Continue.
+      continue;
+    }
+
+    // Best case: we found a register fully available.
+    if (next_use[i] == kMaxLifetimePosition) {
+      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+        // We can get shorter encodings on some platforms by using
+        // small register numbers. So only update the candidate if the previous
+        // one was not available for the whole method.
+        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+          reg = i;
+        }
+        // Continue the iteration in the hope of finding a caller save register.
+        continue;
+      } else {
+        reg = i;
+        // We know the register is good enough. Return it.
+        break;
+      }
+    }
+
+    // If we had no register before, take this one as a reference.
+    if (reg == kNoRegister) {
+      reg = i;
+      continue;
+    }
+
+    // Pick the register that is used the last.
+    if (next_use[i] > next_use[reg]) {
+      reg = i;
+      continue;
+    }
+  }
+  return reg;
+}
+
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+  DCHECK(intervals->begin() <= pos && pos < intervals->end());
+  LiveInterval* interval = *pos;
+  if (interval->IsLowInterval()) {
+    DCHECK(pos + 1 < intervals->end());
+    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+    return intervals->erase(pos, pos + 2);
+  } else if (interval->IsHighInterval()) {
+    DCHECK(intervals->begin() < pos);
+    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+    return intervals->erase(pos - 1, pos + 1);
+  } else {
+    return intervals->erase(pos);
+  }
+}
+
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                                           size_t first_register_use,
+                                                                           size_t* next_use) {
+  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+    LiveInterval* active = *it;
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) continue;
+    if (active->IsHighInterval()) continue;
+    if (first_register_use > next_use[active->GetRegister()]) continue;
+
+    // Split the first interval found that is either:
+    // 1) A non-pair interval.
+    // 2) A pair interval whose high is not low + 1.
+    // 3) A pair interval whose low is not even.
+    if (!active->IsLowInterval() ||
+        IsLowOfUnalignedPairInterval(active) ||
+        !IsLowRegister(active->GetRegister())) {
+      LiveInterval* split = Split(active, position);
+      if (split != active) {
+        handled_.push_back(active);
+      }
+      RemoveIntervalAndPotentialOtherHalf(&active_, it);
+      AddSorted(unhandled_, split);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Find the register that is used the last, and spill the interval
+// that holds it. If the first use of `current` is after that register
+// we spill `current` instead.
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
+  size_t first_register_use = current->FirstRegisterUse();
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    // The low interval has allocated the register for the high interval. In
+    // case the low interval had to split both intervals, we may end up in a
+    // situation where the high interval does not have a register use anymore.
+    // We must still proceed in order to split currently active and inactive
+    // uses of the high interval's register, and put the high interval in the
+    // active set.
+    DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
+  } else if (first_register_use == kNoLifetime) {
+    AllocateSpillSlotFor(current);
+    return false;
+  }
+
+  // First set all registers as not being used.
+  size_t* next_use = registers_array_;
+  for (size_t i = 0; i < number_of_registers_; ++i) {
+    next_use[i] = kMaxLifetimePosition;
+  }
+
+  // For each active interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* active : active_) {
+    DCHECK(active->HasRegister());
+    if (active->IsFixed()) {
+      next_use[active->GetRegister()] = current->GetStart();
+    } else {
+      size_t use = active->FirstRegisterUseAfter(current->GetStart());
+      if (use != kNoLifetime) {
+        next_use[active->GetRegister()] = use;
+      }
+    }
+  }
+
+  // For each inactive interval, find the next use of its register after the
+  // start of current.
+  for (LiveInterval* inactive : inactive_) {
+    // Temp/Slow-path-safepoint interval has no holes.
+    DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+    if (!current->IsSplit() && !inactive->IsFixed()) {
+      // Neither current nor inactive are fixed.
+      // Thanks to SSA, a non-split interval starting in a hole of an
+      // inactive interval should never intersect with that inactive interval.
+      // Only if it's not fixed though, because fixed intervals don't come from SSA.
+      DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+      continue;
+    }
+    DCHECK(inactive->HasRegister());
+    size_t next_intersection = inactive->FirstIntersectionWith(current);
+    if (next_intersection != kNoLifetime) {
+      if (inactive->IsFixed()) {
+        next_use[inactive->GetRegister()] =
+            std::min(next_intersection, next_use[inactive->GetRegister()]);
+      } else {
+        size_t use = inactive->FirstUseAfter(current->GetStart());
+        if (use != kNoLifetime) {
+          next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+        }
+      }
+    }
+  }
+
+  int reg = kNoRegister;
+  bool should_spill = false;
+  if (current->HasRegister()) {
+    DCHECK(current->IsHighInterval());
+    reg = current->GetRegister();
+    // When allocating the low part, we made sure the high register was available.
+    DCHECK_LT(first_register_use, next_use[reg]);
+  } else if (current->IsLowInterval()) {
+    reg = FindAvailableRegisterPair(next_use, first_register_use);
+    // We should spill if both registers are not available.
+    should_spill = (first_register_use >= next_use[reg])
+      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+  } else {
+    DCHECK(!current->IsHighInterval());
+    reg = FindAvailableRegister(next_use, current);
+    should_spill = (first_register_use >= next_use[reg]);
+  }
+
+  DCHECK_NE(reg, kNoRegister);
+  if (should_spill) {
+    DCHECK(!current->IsHighInterval());
+    bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
+    if (is_allocation_at_use_site) {
+      if (!current->IsLowInterval()) {
+        DumpInterval(std::cerr, current);
+        DumpAllIntervals(std::cerr);
+        // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+        HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
+        CHECK(false) << "There is not enough registers available for "
+          << current->GetParent()->GetDefinedBy()->DebugName() << " "
+          << current->GetParent()->GetDefinedBy()->GetId()
+          << " at " << first_register_use - 1 << " "
+          << (at == nullptr ? "" : at->DebugName());
+      }
+
+      // If we're allocating a register for `current` because the instruction at
+      // that position requires it, but we think we should spill, then there are
+      // non-pair intervals or unaligned pair intervals blocking the allocation.
+      // We split the first interval found, and put ourselves first in the
+      // `unhandled_` list.
+      bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+                                                              first_register_use,
+                                                              next_use);
+      DCHECK(success);
+      LiveInterval* existing = unhandled_->back();
+      DCHECK(existing->IsHighInterval());
+      DCHECK_EQ(existing->GetLowInterval(), current);
+      unhandled_->push_back(current);
+    } else {
+      // If the first use of that instruction is after the last use of the found
+      // register, we split this interval just before its first register use.
+      AllocateSpillSlotFor(current);
+      LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+      DCHECK(current != split);
+      AddSorted(unhandled_, split);
+    }
+    return false;
+  } else {
+    // Use this register and spill the active and inactives interval that
+    // have that register.
+    current->SetRegister(reg);
+
+    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+      LiveInterval* active = *it;
+      if (active->GetRegister() == reg) {
+        DCHECK(!active->IsFixed());
+        LiveInterval* split = Split(active, current->GetStart());
+        if (split != active) {
+          handled_.push_back(active);
+        }
+        RemoveIntervalAndPotentialOtherHalf(&active_, it);
+        AddSorted(unhandled_, split);
+        break;
+      }
+    }
+
+    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+      LiveInterval* inactive = *it;
+      bool erased = false;
+      if (inactive->GetRegister() == reg) {
+        if (!current->IsSplit() && !inactive->IsFixed()) {
+          // Neither current nor inactive are fixed.
+          // Thanks to SSA, a non-split interval starting in a hole of an
+          // inactive interval should never intersect with that inactive interval.
+          // Only if it's not fixed though, because fixed intervals don't come from SSA.
+          DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+        } else {
+          size_t next_intersection = inactive->FirstIntersectionWith(current);
+          if (next_intersection != kNoLifetime) {
+            if (inactive->IsFixed()) {
+              LiveInterval* split = Split(current, next_intersection);
+              DCHECK_NE(split, current);
+              AddSorted(unhandled_, split);
+            } else {
+              // Split at the start of `current`, which will lead to splitting
+              // at the end of the lifetime hole of `inactive`.
+              LiveInterval* split = Split(inactive, current->GetStart());
+              // If it's inactive, it must start before the current interval.
+              DCHECK_NE(split, inactive);
+              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+              erased = true;
+              handled_.push_back(inactive);
+              AddSorted(unhandled_, split);
+            }
+          }
+        }
+      }
+      // If we have erased the element, `it` already points to the next element.
+      // Otherwise we need to move to the next element.
+      if (!erased) {
+        ++it;
+      }
+    }
+
+    return true;
+  }
+}
+
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+  DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
+  size_t insert_at = 0;
+  for (size_t i = array->size(); i > 0; --i) {
+    LiveInterval* current = (*array)[i - 1u];
+    // High intervals must be processed right after their low equivalent.
+    if (current->StartsAfter(interval) && !current->IsHighInterval()) {
+      insert_at = i;
+      break;
+    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+      // Ensure the slow path interval is the last to be processed at its location: we want the
+      // interval to know all live registers at this location.
+      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
+      insert_at = i;
+      break;
+    }
+  }
+
+  // Insert the high interval before the low, to ensure the low is processed before.
+  auto insert_pos = array->begin() + insert_at;
+  if (interval->HasHighInterval()) {
+    array->insert(insert_pos, { interval->GetHighInterval(), interval });
+  } else if (interval->HasLowInterval()) {
+    array->insert(insert_pos, { interval, interval->GetLowInterval() });
+  } else {
+    array->insert(insert_pos, interval);
+  }
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
+  if (interval->IsHighInterval()) {
+    // The low interval already took care of allocating the spill slot.
+    DCHECK(!interval->GetLowInterval()->HasRegister());
+    DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
+    return;
+  }
+
+  LiveInterval* parent = interval->GetParent();
+
+  // An instruction gets a spill slot for its entire lifetime. If the parent
+  // of this interval already has a spill slot, there is nothing to do.
+  if (parent->HasSpillSlot()) {
+    return;
+  }
+
+  HInstruction* defined_by = parent->GetDefinedBy();
+  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
+  if (defined_by->IsParameterValue()) {
+    // Parameters have their own stack slot.
+    parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+    return;
+  }
+
+  if (defined_by->IsCurrentMethod()) {
+    parent->SetSpillSlot(0);
+    return;
+  }
+
+  if (defined_by->IsConstant()) {
+    // Constants don't need a spill slot.
+    return;
+  }
+
+  ArenaVector<size_t>* spill_slots = nullptr;
+  switch (interval->GetType()) {
+    case Primitive::kPrimDouble:
+      spill_slots = &double_spill_slots_;
+      break;
+    case Primitive::kPrimLong:
+      spill_slots = &long_spill_slots_;
+      break;
+    case Primitive::kPrimFloat:
+      spill_slots = &float_spill_slots_;
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimShort:
+      spill_slots = &int_spill_slots_;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+  }
+
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots->size(); slot < e; ++slot) {
+    if ((*spill_slots)[slot] <= parent->GetStart()) {
+      if (!parent->NeedsTwoSpillSlots()) {
+        // One spill slot is sufficient.
+        break;
+      }
+      if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+        // Two spill slots are available.
+        break;
+      }
+    }
+  }
+
+  size_t end = interval->GetLastSibling()->GetEnd();
+  if (parent->NeedsTwoSpillSlots()) {
+    if (slot + 2u > spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->resize(slot + 2u, end);
+    }
+    (*spill_slots)[slot] = end;
+    (*spill_slots)[slot + 1] = end;
+  } else {
+    if (slot == spill_slots->size()) {
+      // We need a new spill slot.
+      spill_slots->push_back(end);
+    } else {
+      (*spill_slots)[slot] = end;
+    }
+  }
+
+  // Note that the exact spill slot location will be computed when we resolve,
+  // that is when we know the number of spill slots for each type.
+  parent->SetSpillSlot(slot);
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+  LiveInterval* interval = phi->GetLiveInterval();
+
+  HInstruction* previous_phi = phi->GetPrevious();
+  DCHECK(previous_phi == nullptr ||
+         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+  if (phi->IsVRegEquivalentOf(previous_phi)) {
+    // This is an equivalent of the previous phi. We need to assign the same
+    // catch phi slot.
+    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+  } else {
+    // Allocate a new spill slot for this catch phi.
+    // TODO: Reuse spill slots when intervals of phis from different catch
+    //       blocks do not overlap.
+    interval->SetSpillSlot(catch_phi_spill_slots_);
+    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
new file mode 100644
index 0000000..b6e4f92
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class HPhi;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ */
+class RegisterAllocatorLinearScan : public RegisterAllocator {
+ public:
+  RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+                              CodeGenerator* codegen,
+                              const SsaLivenessAnalysis& analysis);
+
+  void AllocateRegisters() OVERRIDE;
+
+  bool Validate(bool log_fatal_on_failure) OVERRIDE {
+    processing_core_registers_ = true;
+    if (!ValidateInternal(log_fatal_on_failure)) {
+      return false;
+    }
+    processing_core_registers_ = false;
+    return ValidateInternal(log_fatal_on_failure);
+  }
+
+  size_t GetNumberOfSpillSlots() const {
+    return int_spill_slots_.size()
+        + long_spill_slots_.size()
+        + float_spill_slots_.size()
+        + double_spill_slots_.size()
+        + catch_phi_spill_slots_;
+  }
+
+ private:
+  // Main methods of the allocator.
+  void LinearScan();
+  bool TryAllocateFreeReg(LiveInterval* interval);
+  bool AllocateBlockedReg(LiveInterval* interval);
+
+  // Add `interval` in the given sorted list.
+  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+
+  // Returns whether `reg` is blocked by the code generator.
+  bool IsBlocked(int reg) const;
+
+  // Update the interval for the register in `location` to cover [start, end).
+  void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+  // Allocate a spill slot for the given interval. Should be called in linear
+  // order of interval starting positions.
+  void AllocateSpillSlotFor(LiveInterval* interval);
+
+  // Allocate a spill slot for the given catch phi. Will allocate the same slot
+  // for phis which share the same vreg. Must be called in reverse linear order
+  // of lifetime positions and ascending vreg numbers for correctness.
+  void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
+  // Helper methods.
+  void AllocateRegistersInternal();
+  void ProcessInstruction(HInstruction* instruction);
+  bool ValidateInternal(bool log_fatal_on_failure) const;
+  void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
+  void DumpAllIntervals(std::ostream& stream) const;
+  int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+  bool IsCallerSaveRegister(int reg) const;
+
+  // Try splitting an active non-pair or unaligned pair interval at the given `position`.
+  // Returns whether it was successful at finding such an interval.
+  bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+                                                size_t first_register_use,
+                                                size_t* next_use);
+
+  // List of intervals for core registers that must be processed, ordered by start
+  // position. Last entry is the interval that has the lowest start position.
+  // This list is initially populated before doing the linear scan.
+  ArenaVector<LiveInterval*> unhandled_core_intervals_;
+
+  // List of intervals for floating-point registers. Same comments as above.
+  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
+
+  // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
+  // or `unhandled_fp_intervals_`.
+  ArenaVector<LiveInterval*>* unhandled_;
+
+  // List of intervals that have been processed.
+  ArenaVector<LiveInterval*> handled_;
+
+  // List of intervals that are currently active when processing a new live interval.
+  // That is, they have a live range that spans the start of the new interval.
+  ArenaVector<LiveInterval*> active_;
+
+  // List of intervals that are currently inactive when processing a new live interval.
+  // That is, they have a lifetime hole that spans the start of the new interval.
+  ArenaVector<LiveInterval*> inactive_;
+
+  // Fixed intervals for physical registers. Such intervals cover the positions
+  // where an instruction requires a specific register.
+  ArenaVector<LiveInterval*> physical_core_register_intervals_;
+  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
+
+  // Intervals for temporaries. Such intervals cover the positions
+  // where an instruction requires a temporary.
+  ArenaVector<LiveInterval*> temp_intervals_;
+
+  // The spill slots allocated for live intervals. We ensure spill slots
+  // are typed to avoid (1) doing moves and swaps between two different kinds
+  // of registers, and (2) swapping between a single stack slot and a double
+  // stack slot. This simplifies the parallel move resolver.
+  ArenaVector<size_t> int_spill_slots_;
+  ArenaVector<size_t> long_spill_slots_;
+  ArenaVector<size_t> float_spill_slots_;
+  ArenaVector<size_t> double_spill_slots_;
+
+  // Spill slots allocated to catch phis. This category is special-cased because
+  // (1) slots are allocated prior to linear scan and in reverse linear order,
+  // (2) equivalent phis need to share slots despite having different types.
+  size_t catch_phi_spill_slots_;
+
+  // Instructions that need a safepoint.
+  ArenaVector<HInstruction*> safepoints_;
+
+  // True if processing core registers. False if processing floating
+  // point registers.
+  bool processing_core_registers_;
+
+  // Number of registers for the current register kind (core or floating point).
+  size_t number_of_registers_;
+
+  // Temporary array, allocated ahead of time for simplicity.
+  size_t* registers_array_;
+
+  // Blocked registers, as decided by the code generator.
+  bool* const blocked_core_registers_;
+  bool* const blocked_fp_registers_;
+
+  // Slots reserved for out arguments.
+  size_t reserved_out_slots_;
+
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
+
+  ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+  ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a9de7c3..cbb7b2f 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,6 +25,7 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 
@@ -44,9 +45,9 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  return register_allocator.Validate(false);
+  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  register_allocator->AllocateRegisters();
+  return register_allocator->Validate(false);
 }
 
 /**
@@ -295,9 +296,9 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 
   HBasicBlock* loop_header = graph->GetBlocks()[2];
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -384,9 +385,9 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.AllocateRegisters();
-  ASSERT_TRUE(register_allocator.Validate(false));
+  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  register_allocator->AllocateRegisters();
+  ASSERT_TRUE(register_allocator->Validate(false));
 }
 
 /**
@@ -408,7 +409,7 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
 
   // Add an artifical range to cover the temps that will be put in the unhandled list.
   LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -541,8 +542,9 @@
     liveness.Analyze();
 
     // Check that the register allocator is deterministic.
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -560,8 +562,9 @@
     // Set the phi to a specific register, and check that the inputs get allocated
     // the same register.
     phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -579,8 +582,9 @@
     // Set input1 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -598,8 +602,9 @@
     // Set input2 to a specific register, and check that the phi and other input get allocated
     // the same register.
     input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -658,8 +663,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -677,8 +683,9 @@
     // Don't use SetInAt because we are overriding an already allocated location.
     ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
   }
@@ -726,8 +733,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -748,8 +756,9 @@
     ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
     ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
     ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
@@ -795,8 +804,9 @@
     SsaLivenessAnalysis liveness(graph, &codegen);
     liveness.Analyze();
 
-    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-    register_allocator.AllocateRegisters();
+    RegisterAllocator* register_allocator =
+        RegisterAllocator::Create(&allocator, &codegen, liveness);
+    register_allocator->AllocateRegisters();
 
     // div on x86 requires its first input in eax and the output be the same as the first input.
     ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
@@ -892,7 +902,7 @@
     liveness.instructions_from_lifetime_position_.push_back(user);
   }
 
-  RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+  RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
   register_allocator.unhandled_core_intervals_.push_back(fourth);
   register_allocator.unhandled_core_intervals_.push_back(third);
   register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
new file mode 100644
index 0000000..195159f
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "x86_memory_gen.h"
+#include "code_generator.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Replace instructions with memory operand forms.
+ */
+class MemoryOperandVisitor : public HGraphVisitor {
+ public:
+  MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
+      : HGraphVisitor(graph),
+        do_implicit_null_checks_(do_implicit_null_checks) {}
+
+ private:
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+    // Replace the length by the array itself, so that we can do compares to memory.
+    HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+
+    // We only want to replace an ArrayLength.
+    if (array_len == nullptr) {
+      return;
+    }
+
+    HInstruction* array = array_len->InputAt(0);
+    DCHECK_EQ(array->GetType(), Primitive::kPrimNot);
+
+    // Don't apply this optimization when the array is nullptr.
+    if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) {
+      return;
+    }
+
+    // Is there a null check that could be an implicit check?
+    if (array->IsNullCheck() && do_implicit_null_checks_) {
+      // The ArrayLen may generate the implicit null check.  Can the
+      // bounds check do so as well?
+      if (array_len->GetNextDisregardingMoves() != check) {
+        // No, it won't.  Leave as is.
+        return;
+      }
+    }
+
+    // Can we suppress the ArrayLength and generate at BoundCheck?
+    if (array_len->HasOnlyOneNonEnvironmentUse()) {
+      array_len->MarkEmittedAtUseSite();
+      // We need the ArrayLength just before the BoundsCheck.
+      array_len->MoveBefore(check);
+    }
+  }
+
+  bool do_implicit_null_checks_;
+};
+
+X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
+                                                       OptimizingCompilerStats* stats,
+                                                       CodeGenerator* codegen)
+    : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
+      do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
+}
+
+void X86MemoryOperandGeneration::Run() {
+  MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
new file mode 100644
index 0000000..7e88681
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+class CodeGenerator;
+
+namespace x86 {
+
+class X86MemoryOperandGeneration : public HOptimization {
+ public:
+  X86MemoryOperandGeneration(HGraph* graph,
+                             OptimizingCompilerStats* stats,
+                             CodeGenerator* codegen);
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kX86MemoryOperandGenerationPassName =
+          "x86_memory_operand_generation";
+
+ private:
+  bool do_implicit_null_checks_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 8747dad..353c729 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2456,6 +2456,9 @@
         } else if (!byte) {
           encoding |= B22;
         }
+        if (load && is_signed && (byte || half)) {
+          encoding |= B24;
+        }
         Emit32(encoding);
       } else {
         // 16 bit register offset.
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index f3fa72c..abb09f7 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1450,4 +1450,23 @@
   DriverStr(expected, "vpaddl");
 }
 
+TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) {
+  arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2);
+
+  __ ldrsb(arm::R2, mem_address);
+  __ ldrb(arm::R2, mem_address);
+  __ ldrsh(arm::R2, mem_address);
+  __ ldrh(arm::R2, mem_address);
+  __ ldr(arm::R2, mem_address);
+
+  std::string expected =
+      "ldrsb r2, [r0, r1, LSL #2]\n"
+      "ldrb r2, [r0, r1, LSL #2]\n"
+      "ldrsh r2, [r0, r1, LSL #2]\n"
+      "ldrh r2, [r0, r1, LSL #2]\n"
+      "ldr r2, [r0, r1, LSL #2]\n";
+
+  DriverStr(expected, "LoadFromShiftedRegOffset");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 54ed62b..9f2027f 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -20,7 +20,7 @@
 #include "offsets.h"
 #include "thread.h"
 
-using namespace vixl;  // NOLINT(build/namespaces)
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
 namespace art {
 namespace arm64 {
@@ -39,7 +39,7 @@
 }
 
 size_t Arm64Assembler::CodeSize() const {
-  return vixl_masm_->BufferCapacity() - vixl_masm_->RemainingBufferSpace();
+  return vixl_masm_->GetBufferCapacity() - vixl_masm_->GetRemainingBufferSpace();
 }
 
 const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
@@ -86,9 +86,9 @@
   } else {
     // temp = rd + value
     // rd = cond ? temp : rn
-    vixl::UseScratchRegisterScope temps(vixl_masm_);
+    UseScratchRegisterScope temps(vixl_masm_);
     temps.Exclude(reg_x(rd), reg_x(rn));
-    vixl::Register temp = temps.AcquireX();
+    Register temp = temps.AcquireX();
     ___ Add(temp, reg_x(rn), value);
     ___ Csel(reg_x(rd), temp, reg_x(rd), cond);
   }
@@ -182,8 +182,8 @@
 }
 
 void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
-  vixl::Register temp = temps.AcquireX();
+  UseScratchRegisterScope temps(vixl_masm_);
+  Register temp = temps.AcquireX();
   ___ Mov(temp, reg_x(SP));
   ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
@@ -206,9 +206,9 @@
     // temp = value
     // rd = cond ? temp : rd
     if (value != 0) {
-      vixl::UseScratchRegisterScope temps(vixl_masm_);
+      UseScratchRegisterScope temps(vixl_masm_);
       temps.Exclude(reg_x(dest));
-      vixl::Register temp = temps.AcquireX();
+      Register temp = temps.AcquireX();
       ___ Mov(temp, value);
       ___ Csel(reg_x(dest), temp, reg_x(dest), cond);
     } else {
@@ -313,7 +313,7 @@
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsXRegister() && base.IsXRegister());
   // Remove dst and base form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(vixl_masm_);
   temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
   ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
 }
@@ -479,7 +479,7 @@
 
 void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
   // TODO: Should we check that m_scratch is IP? - see arm.
-  ___ Dmb(vixl::InnerShareable, vixl::BarrierAll);
+  ___ Dmb(InnerShareable, BarrierAll);
 }
 
 void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
@@ -527,7 +527,7 @@
   CHECK(base.IsXRegister()) << base;
   CHECK(scratch.IsXRegister()) << scratch;
   // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(vixl_masm_);
   temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
   ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsXRegister()));
@@ -598,7 +598,7 @@
   Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
   CHECK(out_reg.IsXRegister()) << out_reg;
   CHECK(in_reg.IsXRegister()) << in_reg;
-  vixl::Label exit;
+  vixl::aarch64::Label exit;
   if (!out_reg.Equals(in_reg)) {
     // FIXME: Who sets the flags here?
     LoadImmediate(out_reg.AsXRegister(), 0, eq);
@@ -617,9 +617,9 @@
 }
 
 void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  UseScratchRegisterScope temps(vixl_masm_);
   temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  vixl::Register temp = temps.AcquireX();
+  Register temp = temps.AcquireX();
 
   // Bind exception poll entry.
   ___ Bind(exception->Entry());
@@ -638,26 +638,26 @@
 
 static inline dwarf::Reg DWARFReg(CPURegister reg) {
   if (reg.IsFPRegister()) {
-    return dwarf::Reg::Arm64Fp(reg.code());
+    return dwarf::Reg::Arm64Fp(reg.GetCode());
   } else {
-    DCHECK_LT(reg.code(), 31u);  // X0 - X30.
-    return dwarf::Reg::Arm64Core(reg.code());
+    DCHECK_LT(reg.GetCode(), 31u);  // X0 - X30.
+    return dwarf::Reg::Arm64Core(reg.GetCode());
   }
 }
 
-void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
+void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
   // Since we are operating on register pairs, we would like to align on
   // double the standard size; on the other hand, we don't want to insert
   // an extra store, which will happen if the number of registers is even.
-  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     ___ Str(dst0, MemOperand(sp, offset));
     cfi_.RelOffset(DWARFReg(dst0), offset);
     offset += size;
   }
-  while (registers.Count() >= 2) {
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Stp(dst0, dst1, MemOperand(sp, offset));
@@ -673,17 +673,17 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) {
-  int size = registers.RegisterSizeInBytes();
+void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) {
+  int size = registers.GetRegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
   // Be consistent with the logic for spilling registers.
-  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+  if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     ___ Ldr(dst0, MemOperand(sp, offset));
     cfi_.Restore(DWARFReg(dst0));
     offset += size;
   }
-  while (registers.Count() >= 2) {
+  while (registers.GetCount() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
     ___ Ldp(dst0, dst1, MemOperand(sp, offset));
@@ -709,14 +709,14 @@
   for (auto r : callee_save_regs) {
     Arm64ManagedRegister reg = r.AsArm64();
     if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
     } else {
       DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
     }
   }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
 
   // Increase frame to required size.
   DCHECK_ALIGNED(frame_size, kStackAlignment);
@@ -765,14 +765,14 @@
   for (auto r : callee_save_regs) {
     Arm64ManagedRegister reg = r.AsArm64();
     if (reg.IsXRegister()) {
-      core_reg_list.Combine(reg_x(reg.AsXRegister()).code());
+      core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode());
     } else {
       DCHECK(reg.IsDRegister());
-      fp_reg_list.Combine(reg_d(reg.AsDRegister()).code());
+      fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode());
     }
   }
-  size_t core_reg_size = core_reg_list.TotalSizeInBytes();
-  size_t fp_reg_size = fp_reg_list.TotalSizeInBytes();
+  size_t core_reg_size = core_reg_list.GetTotalSizeInBytes();
+  size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes();
 
   // For now we only check that the size of the frame is large enough to hold spills and method
   // reference.
@@ -798,19 +798,19 @@
   cfi_.DefCFAOffset(frame_size);
 }
 
-void Arm64Assembler::PoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::PoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::UnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::UnpoisonHeapReference(Register reg) {
   DCHECK(reg.IsW());
   // reg = -reg.
-  ___ Neg(reg, vixl::Operand(reg));
+  ___ Neg(reg, Operand(reg));
 }
 
-void Arm64Assembler::MaybeUnpoisonHeapReference(vixl::Register reg) {
+void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
   }
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 91171a8..a481544 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -28,19 +28,19 @@
 #include "utils/assembler.h"
 #include "offsets.h"
 
-// TODO: make vixl clean wrt -Wshadow.
+// TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunknown-pragmas"
 #pragma GCC diagnostic ignored "-Wshadow"
 #pragma GCC diagnostic ignored "-Wmissing-noreturn"
-#include "vixl/a64/macro-assembler-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "a64/disasm-a64.h"
+#include "a64/macro-assembler-a64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-#define MEM_OP(...)      vixl::MemOperand(__VA_ARGS__)
+#define MEM_OP(...)      vixl::aarch64::MemOperand(__VA_ARGS__)
 
 enum LoadOperandType {
   kLoadSignedByte,
@@ -68,7 +68,7 @@
       : scratch_(scratch), stack_adjust_(stack_adjust) {
     }
 
-  vixl::Label* Entry() { return &exception_entry_; }
+  vixl::aarch64::Label* Entry() { return &exception_entry_; }
 
   // Register used for passing Thread::Current()->exception_ .
   const Arm64ManagedRegister scratch_;
@@ -76,7 +76,7 @@
   // Stack adjust for ExceptionPool.
   const size_t stack_adjust_;
 
-  vixl::Label exception_entry_;
+  vixl::aarch64::Label exception_entry_;
 
   friend class Arm64Assembler;
   DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
@@ -89,7 +89,7 @@
   explicit Arm64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
         exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
-        vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
+        vixl_masm_(new vixl::aarch64::MacroAssembler(kArm64BaseBufferSize)) {}
 
   virtual ~Arm64Assembler() {
     delete vixl_masm_;
@@ -105,8 +105,8 @@
   // Copy instructions out of assembly buffer into the given region of memory.
   void FinalizeInstructions(const MemoryRegion& region);
 
-  void SpillRegisters(vixl::CPURegList registers, int offset);
-  void UnspillRegisters(vixl::CPURegList registers, int offset);
+  void SpillRegisters(vixl::aarch64::CPURegList registers, int offset);
+  void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset);
 
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
@@ -177,13 +177,17 @@
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                       ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                       ManagedRegister scratch, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister scratch,
+                              bool null_allowed) OVERRIDE;
 
   // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
@@ -210,11 +214,11 @@
   //
 
   // Poison a heap reference contained in `reg`.
-  void PoisonHeapReference(vixl::Register reg);
+  void PoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg`.
-  void UnpoisonHeapReference(vixl::Register reg);
+  void UnpoisonHeapReference(vixl::aarch64::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
-  void MaybeUnpoisonHeapReference(vixl::Register reg);
+  void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
 
   void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
@@ -224,32 +228,32 @@
   }
 
  private:
-  static vixl::Register reg_x(int code) {
+  static vixl::aarch64::Register reg_x(int code) {
     CHECK(code < kNumberOfXRegisters) << code;
     if (code == SP) {
-      return vixl::sp;
+      return vixl::aarch64::sp;
     } else if (code == XZR) {
-      return vixl::xzr;
+      return vixl::aarch64::xzr;
     }
-    return vixl::Register::XRegFromCode(code);
+    return vixl::aarch64::Register::GetXRegFromCode(code);
   }
 
-  static vixl::Register reg_w(int code) {
+  static vixl::aarch64::Register reg_w(int code) {
     CHECK(code < kNumberOfWRegisters) << code;
     if (code == WSP) {
-      return vixl::wsp;
+      return vixl::aarch64::wsp;
     } else if (code == WZR) {
-      return vixl::wzr;
+      return vixl::aarch64::wzr;
     }
-    return vixl::Register::WRegFromCode(code);
+    return vixl::aarch64::Register::GetWRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_d(int code) {
-    return vixl::FPRegister::DRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_d(int code) {
+    return vixl::aarch64::FPRegister::GetDRegFromCode(code);
   }
 
-  static vixl::FPRegister reg_s(int code) {
-    return vixl::FPRegister::SRegFromCode(code);
+  static vixl::aarch64::FPRegister reg_s(int code) {
+    return vixl::aarch64::FPRegister::GetSRegFromCode(code);
   }
 
   // Emits Exception block.
@@ -261,22 +265,31 @@
   void StoreSToOffset(SRegister source, XRegister base, int32_t offset);
   void StoreDToOffset(DRegister source, XRegister base, int32_t offset);
 
-  void LoadImmediate(XRegister dest, int32_t value, vixl::Condition cond = vixl::al);
+  void LoadImmediate(XRegister dest,
+                     int32_t value,
+                     vixl::aarch64::Condition cond = vixl::aarch64::al);
   void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size);
-  void LoadWFromOffset(LoadOperandType type, WRegister dest,
-                      XRegister base, int32_t offset);
+  void LoadWFromOffset(LoadOperandType type,
+                       WRegister dest,
+                       XRegister base,
+                       int32_t offset);
   void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
   void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
   void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
-  void AddConstant(XRegister rd, int32_t value, vixl::Condition cond = vixl::al);
-  void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
+  void AddConstant(XRegister rd,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
+  void AddConstant(XRegister rd,
+                   XRegister rn,
+                   int32_t value,
+                   vixl::aarch64::Condition cond = vixl::aarch64::al);
 
   // List of exception blocks to generate at the end of the code cache.
   ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
 
  public:
   // Vixl assembler.
-  vixl::MacroAssembler* const vixl_masm_;
+  vixl::aarch64::MacroAssembler* const vixl_masm_;
 
   // Used for testing.
   friend class Arm64ManagedRegister_VixlRegisters_Test;
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index e27115d..79076b8 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -591,149 +591,149 @@
 
 TEST(Arm64ManagedRegister, VixlRegisters) {
   // X Registers.
-  EXPECT_TRUE(vixl::x0.Is(Arm64Assembler::reg_x(X0)));
-  EXPECT_TRUE(vixl::x1.Is(Arm64Assembler::reg_x(X1)));
-  EXPECT_TRUE(vixl::x2.Is(Arm64Assembler::reg_x(X2)));
-  EXPECT_TRUE(vixl::x3.Is(Arm64Assembler::reg_x(X3)));
-  EXPECT_TRUE(vixl::x4.Is(Arm64Assembler::reg_x(X4)));
-  EXPECT_TRUE(vixl::x5.Is(Arm64Assembler::reg_x(X5)));
-  EXPECT_TRUE(vixl::x6.Is(Arm64Assembler::reg_x(X6)));
-  EXPECT_TRUE(vixl::x7.Is(Arm64Assembler::reg_x(X7)));
-  EXPECT_TRUE(vixl::x8.Is(Arm64Assembler::reg_x(X8)));
-  EXPECT_TRUE(vixl::x9.Is(Arm64Assembler::reg_x(X9)));
-  EXPECT_TRUE(vixl::x10.Is(Arm64Assembler::reg_x(X10)));
-  EXPECT_TRUE(vixl::x11.Is(Arm64Assembler::reg_x(X11)));
-  EXPECT_TRUE(vixl::x12.Is(Arm64Assembler::reg_x(X12)));
-  EXPECT_TRUE(vixl::x13.Is(Arm64Assembler::reg_x(X13)));
-  EXPECT_TRUE(vixl::x14.Is(Arm64Assembler::reg_x(X14)));
-  EXPECT_TRUE(vixl::x15.Is(Arm64Assembler::reg_x(X15)));
-  EXPECT_TRUE(vixl::x16.Is(Arm64Assembler::reg_x(X16)));
-  EXPECT_TRUE(vixl::x17.Is(Arm64Assembler::reg_x(X17)));
-  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(X18)));
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(X19)));
-  EXPECT_TRUE(vixl::x20.Is(Arm64Assembler::reg_x(X20)));
-  EXPECT_TRUE(vixl::x21.Is(Arm64Assembler::reg_x(X21)));
-  EXPECT_TRUE(vixl::x22.Is(Arm64Assembler::reg_x(X22)));
-  EXPECT_TRUE(vixl::x23.Is(Arm64Assembler::reg_x(X23)));
-  EXPECT_TRUE(vixl::x24.Is(Arm64Assembler::reg_x(X24)));
-  EXPECT_TRUE(vixl::x25.Is(Arm64Assembler::reg_x(X25)));
-  EXPECT_TRUE(vixl::x26.Is(Arm64Assembler::reg_x(X26)));
-  EXPECT_TRUE(vixl::x27.Is(Arm64Assembler::reg_x(X27)));
-  EXPECT_TRUE(vixl::x28.Is(Arm64Assembler::reg_x(X28)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
-  EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
+  EXPECT_TRUE(vixl::aarch64::x0.Is(Arm64Assembler::reg_x(X0)));
+  EXPECT_TRUE(vixl::aarch64::x1.Is(Arm64Assembler::reg_x(X1)));
+  EXPECT_TRUE(vixl::aarch64::x2.Is(Arm64Assembler::reg_x(X2)));
+  EXPECT_TRUE(vixl::aarch64::x3.Is(Arm64Assembler::reg_x(X3)));
+  EXPECT_TRUE(vixl::aarch64::x4.Is(Arm64Assembler::reg_x(X4)));
+  EXPECT_TRUE(vixl::aarch64::x5.Is(Arm64Assembler::reg_x(X5)));
+  EXPECT_TRUE(vixl::aarch64::x6.Is(Arm64Assembler::reg_x(X6)));
+  EXPECT_TRUE(vixl::aarch64::x7.Is(Arm64Assembler::reg_x(X7)));
+  EXPECT_TRUE(vixl::aarch64::x8.Is(Arm64Assembler::reg_x(X8)));
+  EXPECT_TRUE(vixl::aarch64::x9.Is(Arm64Assembler::reg_x(X9)));
+  EXPECT_TRUE(vixl::aarch64::x10.Is(Arm64Assembler::reg_x(X10)));
+  EXPECT_TRUE(vixl::aarch64::x11.Is(Arm64Assembler::reg_x(X11)));
+  EXPECT_TRUE(vixl::aarch64::x12.Is(Arm64Assembler::reg_x(X12)));
+  EXPECT_TRUE(vixl::aarch64::x13.Is(Arm64Assembler::reg_x(X13)));
+  EXPECT_TRUE(vixl::aarch64::x14.Is(Arm64Assembler::reg_x(X14)));
+  EXPECT_TRUE(vixl::aarch64::x15.Is(Arm64Assembler::reg_x(X15)));
+  EXPECT_TRUE(vixl::aarch64::x16.Is(Arm64Assembler::reg_x(X16)));
+  EXPECT_TRUE(vixl::aarch64::x17.Is(Arm64Assembler::reg_x(X17)));
+  EXPECT_TRUE(vixl::aarch64::x18.Is(Arm64Assembler::reg_x(X18)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(X19)));
+  EXPECT_TRUE(vixl::aarch64::x20.Is(Arm64Assembler::reg_x(X20)));
+  EXPECT_TRUE(vixl::aarch64::x21.Is(Arm64Assembler::reg_x(X21)));
+  EXPECT_TRUE(vixl::aarch64::x22.Is(Arm64Assembler::reg_x(X22)));
+  EXPECT_TRUE(vixl::aarch64::x23.Is(Arm64Assembler::reg_x(X23)));
+  EXPECT_TRUE(vixl::aarch64::x24.Is(Arm64Assembler::reg_x(X24)));
+  EXPECT_TRUE(vixl::aarch64::x25.Is(Arm64Assembler::reg_x(X25)));
+  EXPECT_TRUE(vixl::aarch64::x26.Is(Arm64Assembler::reg_x(X26)));
+  EXPECT_TRUE(vixl::aarch64::x27.Is(Arm64Assembler::reg_x(X27)));
+  EXPECT_TRUE(vixl::aarch64::x28.Is(Arm64Assembler::reg_x(X28)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(X29)));
+  EXPECT_TRUE(vixl::aarch64::x30.Is(Arm64Assembler::reg_x(X30)));
 
-  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR)));
-  EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
-  EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
-  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
-  EXPECT_TRUE(vixl::lr.Is(Arm64Assembler::reg_x(LR)));
-  EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(SP)));
-  EXPECT_TRUE(vixl::xzr.Is(Arm64Assembler::reg_x(XZR)));
+  EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::aarch64::ip0.Is(Arm64Assembler::reg_x(IP0)));
+  EXPECT_TRUE(vixl::aarch64::ip1.Is(Arm64Assembler::reg_x(IP1)));
+  EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(FP)));
+  EXPECT_TRUE(vixl::aarch64::lr.Is(Arm64Assembler::reg_x(LR)));
+  EXPECT_TRUE(vixl::aarch64::sp.Is(Arm64Assembler::reg_x(SP)));
+  EXPECT_TRUE(vixl::aarch64::xzr.Is(Arm64Assembler::reg_x(XZR)));
 
   // W Registers.
-  EXPECT_TRUE(vixl::w0.Is(Arm64Assembler::reg_w(W0)));
-  EXPECT_TRUE(vixl::w1.Is(Arm64Assembler::reg_w(W1)));
-  EXPECT_TRUE(vixl::w2.Is(Arm64Assembler::reg_w(W2)));
-  EXPECT_TRUE(vixl::w3.Is(Arm64Assembler::reg_w(W3)));
-  EXPECT_TRUE(vixl::w4.Is(Arm64Assembler::reg_w(W4)));
-  EXPECT_TRUE(vixl::w5.Is(Arm64Assembler::reg_w(W5)));
-  EXPECT_TRUE(vixl::w6.Is(Arm64Assembler::reg_w(W6)));
-  EXPECT_TRUE(vixl::w7.Is(Arm64Assembler::reg_w(W7)));
-  EXPECT_TRUE(vixl::w8.Is(Arm64Assembler::reg_w(W8)));
-  EXPECT_TRUE(vixl::w9.Is(Arm64Assembler::reg_w(W9)));
-  EXPECT_TRUE(vixl::w10.Is(Arm64Assembler::reg_w(W10)));
-  EXPECT_TRUE(vixl::w11.Is(Arm64Assembler::reg_w(W11)));
-  EXPECT_TRUE(vixl::w12.Is(Arm64Assembler::reg_w(W12)));
-  EXPECT_TRUE(vixl::w13.Is(Arm64Assembler::reg_w(W13)));
-  EXPECT_TRUE(vixl::w14.Is(Arm64Assembler::reg_w(W14)));
-  EXPECT_TRUE(vixl::w15.Is(Arm64Assembler::reg_w(W15)));
-  EXPECT_TRUE(vixl::w16.Is(Arm64Assembler::reg_w(W16)));
-  EXPECT_TRUE(vixl::w17.Is(Arm64Assembler::reg_w(W17)));
-  EXPECT_TRUE(vixl::w18.Is(Arm64Assembler::reg_w(W18)));
-  EXPECT_TRUE(vixl::w19.Is(Arm64Assembler::reg_w(W19)));
-  EXPECT_TRUE(vixl::w20.Is(Arm64Assembler::reg_w(W20)));
-  EXPECT_TRUE(vixl::w21.Is(Arm64Assembler::reg_w(W21)));
-  EXPECT_TRUE(vixl::w22.Is(Arm64Assembler::reg_w(W22)));
-  EXPECT_TRUE(vixl::w23.Is(Arm64Assembler::reg_w(W23)));
-  EXPECT_TRUE(vixl::w24.Is(Arm64Assembler::reg_w(W24)));
-  EXPECT_TRUE(vixl::w25.Is(Arm64Assembler::reg_w(W25)));
-  EXPECT_TRUE(vixl::w26.Is(Arm64Assembler::reg_w(W26)));
-  EXPECT_TRUE(vixl::w27.Is(Arm64Assembler::reg_w(W27)));
-  EXPECT_TRUE(vixl::w28.Is(Arm64Assembler::reg_w(W28)));
-  EXPECT_TRUE(vixl::w29.Is(Arm64Assembler::reg_w(W29)));
-  EXPECT_TRUE(vixl::w30.Is(Arm64Assembler::reg_w(W30)));
-  EXPECT_TRUE(vixl::w31.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wzr.Is(Arm64Assembler::reg_w(WZR)));
-  EXPECT_TRUE(vixl::wsp.Is(Arm64Assembler::reg_w(WSP)));
+  EXPECT_TRUE(vixl::aarch64::w0.Is(Arm64Assembler::reg_w(W0)));
+  EXPECT_TRUE(vixl::aarch64::w1.Is(Arm64Assembler::reg_w(W1)));
+  EXPECT_TRUE(vixl::aarch64::w2.Is(Arm64Assembler::reg_w(W2)));
+  EXPECT_TRUE(vixl::aarch64::w3.Is(Arm64Assembler::reg_w(W3)));
+  EXPECT_TRUE(vixl::aarch64::w4.Is(Arm64Assembler::reg_w(W4)));
+  EXPECT_TRUE(vixl::aarch64::w5.Is(Arm64Assembler::reg_w(W5)));
+  EXPECT_TRUE(vixl::aarch64::w6.Is(Arm64Assembler::reg_w(W6)));
+  EXPECT_TRUE(vixl::aarch64::w7.Is(Arm64Assembler::reg_w(W7)));
+  EXPECT_TRUE(vixl::aarch64::w8.Is(Arm64Assembler::reg_w(W8)));
+  EXPECT_TRUE(vixl::aarch64::w9.Is(Arm64Assembler::reg_w(W9)));
+  EXPECT_TRUE(vixl::aarch64::w10.Is(Arm64Assembler::reg_w(W10)));
+  EXPECT_TRUE(vixl::aarch64::w11.Is(Arm64Assembler::reg_w(W11)));
+  EXPECT_TRUE(vixl::aarch64::w12.Is(Arm64Assembler::reg_w(W12)));
+  EXPECT_TRUE(vixl::aarch64::w13.Is(Arm64Assembler::reg_w(W13)));
+  EXPECT_TRUE(vixl::aarch64::w14.Is(Arm64Assembler::reg_w(W14)));
+  EXPECT_TRUE(vixl::aarch64::w15.Is(Arm64Assembler::reg_w(W15)));
+  EXPECT_TRUE(vixl::aarch64::w16.Is(Arm64Assembler::reg_w(W16)));
+  EXPECT_TRUE(vixl::aarch64::w17.Is(Arm64Assembler::reg_w(W17)));
+  EXPECT_TRUE(vixl::aarch64::w18.Is(Arm64Assembler::reg_w(W18)));
+  EXPECT_TRUE(vixl::aarch64::w19.Is(Arm64Assembler::reg_w(W19)));
+  EXPECT_TRUE(vixl::aarch64::w20.Is(Arm64Assembler::reg_w(W20)));
+  EXPECT_TRUE(vixl::aarch64::w21.Is(Arm64Assembler::reg_w(W21)));
+  EXPECT_TRUE(vixl::aarch64::w22.Is(Arm64Assembler::reg_w(W22)));
+  EXPECT_TRUE(vixl::aarch64::w23.Is(Arm64Assembler::reg_w(W23)));
+  EXPECT_TRUE(vixl::aarch64::w24.Is(Arm64Assembler::reg_w(W24)));
+  EXPECT_TRUE(vixl::aarch64::w25.Is(Arm64Assembler::reg_w(W25)));
+  EXPECT_TRUE(vixl::aarch64::w26.Is(Arm64Assembler::reg_w(W26)));
+  EXPECT_TRUE(vixl::aarch64::w27.Is(Arm64Assembler::reg_w(W27)));
+  EXPECT_TRUE(vixl::aarch64::w28.Is(Arm64Assembler::reg_w(W28)));
+  EXPECT_TRUE(vixl::aarch64::w29.Is(Arm64Assembler::reg_w(W29)));
+  EXPECT_TRUE(vixl::aarch64::w30.Is(Arm64Assembler::reg_w(W30)));
+  EXPECT_TRUE(vixl::aarch64::w31.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wzr.Is(Arm64Assembler::reg_w(WZR)));
+  EXPECT_TRUE(vixl::aarch64::wsp.Is(Arm64Assembler::reg_w(WSP)));
 
   // D Registers.
-  EXPECT_TRUE(vixl::d0.Is(Arm64Assembler::reg_d(D0)));
-  EXPECT_TRUE(vixl::d1.Is(Arm64Assembler::reg_d(D1)));
-  EXPECT_TRUE(vixl::d2.Is(Arm64Assembler::reg_d(D2)));
-  EXPECT_TRUE(vixl::d3.Is(Arm64Assembler::reg_d(D3)));
-  EXPECT_TRUE(vixl::d4.Is(Arm64Assembler::reg_d(D4)));
-  EXPECT_TRUE(vixl::d5.Is(Arm64Assembler::reg_d(D5)));
-  EXPECT_TRUE(vixl::d6.Is(Arm64Assembler::reg_d(D6)));
-  EXPECT_TRUE(vixl::d7.Is(Arm64Assembler::reg_d(D7)));
-  EXPECT_TRUE(vixl::d8.Is(Arm64Assembler::reg_d(D8)));
-  EXPECT_TRUE(vixl::d9.Is(Arm64Assembler::reg_d(D9)));
-  EXPECT_TRUE(vixl::d10.Is(Arm64Assembler::reg_d(D10)));
-  EXPECT_TRUE(vixl::d11.Is(Arm64Assembler::reg_d(D11)));
-  EXPECT_TRUE(vixl::d12.Is(Arm64Assembler::reg_d(D12)));
-  EXPECT_TRUE(vixl::d13.Is(Arm64Assembler::reg_d(D13)));
-  EXPECT_TRUE(vixl::d14.Is(Arm64Assembler::reg_d(D14)));
-  EXPECT_TRUE(vixl::d15.Is(Arm64Assembler::reg_d(D15)));
-  EXPECT_TRUE(vixl::d16.Is(Arm64Assembler::reg_d(D16)));
-  EXPECT_TRUE(vixl::d17.Is(Arm64Assembler::reg_d(D17)));
-  EXPECT_TRUE(vixl::d18.Is(Arm64Assembler::reg_d(D18)));
-  EXPECT_TRUE(vixl::d19.Is(Arm64Assembler::reg_d(D19)));
-  EXPECT_TRUE(vixl::d20.Is(Arm64Assembler::reg_d(D20)));
-  EXPECT_TRUE(vixl::d21.Is(Arm64Assembler::reg_d(D21)));
-  EXPECT_TRUE(vixl::d22.Is(Arm64Assembler::reg_d(D22)));
-  EXPECT_TRUE(vixl::d23.Is(Arm64Assembler::reg_d(D23)));
-  EXPECT_TRUE(vixl::d24.Is(Arm64Assembler::reg_d(D24)));
-  EXPECT_TRUE(vixl::d25.Is(Arm64Assembler::reg_d(D25)));
-  EXPECT_TRUE(vixl::d26.Is(Arm64Assembler::reg_d(D26)));
-  EXPECT_TRUE(vixl::d27.Is(Arm64Assembler::reg_d(D27)));
-  EXPECT_TRUE(vixl::d28.Is(Arm64Assembler::reg_d(D28)));
-  EXPECT_TRUE(vixl::d29.Is(Arm64Assembler::reg_d(D29)));
-  EXPECT_TRUE(vixl::d30.Is(Arm64Assembler::reg_d(D30)));
-  EXPECT_TRUE(vixl::d31.Is(Arm64Assembler::reg_d(D31)));
+  EXPECT_TRUE(vixl::aarch64::d0.Is(Arm64Assembler::reg_d(D0)));
+  EXPECT_TRUE(vixl::aarch64::d1.Is(Arm64Assembler::reg_d(D1)));
+  EXPECT_TRUE(vixl::aarch64::d2.Is(Arm64Assembler::reg_d(D2)));
+  EXPECT_TRUE(vixl::aarch64::d3.Is(Arm64Assembler::reg_d(D3)));
+  EXPECT_TRUE(vixl::aarch64::d4.Is(Arm64Assembler::reg_d(D4)));
+  EXPECT_TRUE(vixl::aarch64::d5.Is(Arm64Assembler::reg_d(D5)));
+  EXPECT_TRUE(vixl::aarch64::d6.Is(Arm64Assembler::reg_d(D6)));
+  EXPECT_TRUE(vixl::aarch64::d7.Is(Arm64Assembler::reg_d(D7)));
+  EXPECT_TRUE(vixl::aarch64::d8.Is(Arm64Assembler::reg_d(D8)));
+  EXPECT_TRUE(vixl::aarch64::d9.Is(Arm64Assembler::reg_d(D9)));
+  EXPECT_TRUE(vixl::aarch64::d10.Is(Arm64Assembler::reg_d(D10)));
+  EXPECT_TRUE(vixl::aarch64::d11.Is(Arm64Assembler::reg_d(D11)));
+  EXPECT_TRUE(vixl::aarch64::d12.Is(Arm64Assembler::reg_d(D12)));
+  EXPECT_TRUE(vixl::aarch64::d13.Is(Arm64Assembler::reg_d(D13)));
+  EXPECT_TRUE(vixl::aarch64::d14.Is(Arm64Assembler::reg_d(D14)));
+  EXPECT_TRUE(vixl::aarch64::d15.Is(Arm64Assembler::reg_d(D15)));
+  EXPECT_TRUE(vixl::aarch64::d16.Is(Arm64Assembler::reg_d(D16)));
+  EXPECT_TRUE(vixl::aarch64::d17.Is(Arm64Assembler::reg_d(D17)));
+  EXPECT_TRUE(vixl::aarch64::d18.Is(Arm64Assembler::reg_d(D18)));
+  EXPECT_TRUE(vixl::aarch64::d19.Is(Arm64Assembler::reg_d(D19)));
+  EXPECT_TRUE(vixl::aarch64::d20.Is(Arm64Assembler::reg_d(D20)));
+  EXPECT_TRUE(vixl::aarch64::d21.Is(Arm64Assembler::reg_d(D21)));
+  EXPECT_TRUE(vixl::aarch64::d22.Is(Arm64Assembler::reg_d(D22)));
+  EXPECT_TRUE(vixl::aarch64::d23.Is(Arm64Assembler::reg_d(D23)));
+  EXPECT_TRUE(vixl::aarch64::d24.Is(Arm64Assembler::reg_d(D24)));
+  EXPECT_TRUE(vixl::aarch64::d25.Is(Arm64Assembler::reg_d(D25)));
+  EXPECT_TRUE(vixl::aarch64::d26.Is(Arm64Assembler::reg_d(D26)));
+  EXPECT_TRUE(vixl::aarch64::d27.Is(Arm64Assembler::reg_d(D27)));
+  EXPECT_TRUE(vixl::aarch64::d28.Is(Arm64Assembler::reg_d(D28)));
+  EXPECT_TRUE(vixl::aarch64::d29.Is(Arm64Assembler::reg_d(D29)));
+  EXPECT_TRUE(vixl::aarch64::d30.Is(Arm64Assembler::reg_d(D30)));
+  EXPECT_TRUE(vixl::aarch64::d31.Is(Arm64Assembler::reg_d(D31)));
 
   // S Registers.
-  EXPECT_TRUE(vixl::s0.Is(Arm64Assembler::reg_s(S0)));
-  EXPECT_TRUE(vixl::s1.Is(Arm64Assembler::reg_s(S1)));
-  EXPECT_TRUE(vixl::s2.Is(Arm64Assembler::reg_s(S2)));
-  EXPECT_TRUE(vixl::s3.Is(Arm64Assembler::reg_s(S3)));
-  EXPECT_TRUE(vixl::s4.Is(Arm64Assembler::reg_s(S4)));
-  EXPECT_TRUE(vixl::s5.Is(Arm64Assembler::reg_s(S5)));
-  EXPECT_TRUE(vixl::s6.Is(Arm64Assembler::reg_s(S6)));
-  EXPECT_TRUE(vixl::s7.Is(Arm64Assembler::reg_s(S7)));
-  EXPECT_TRUE(vixl::s8.Is(Arm64Assembler::reg_s(S8)));
-  EXPECT_TRUE(vixl::s9.Is(Arm64Assembler::reg_s(S9)));
-  EXPECT_TRUE(vixl::s10.Is(Arm64Assembler::reg_s(S10)));
-  EXPECT_TRUE(vixl::s11.Is(Arm64Assembler::reg_s(S11)));
-  EXPECT_TRUE(vixl::s12.Is(Arm64Assembler::reg_s(S12)));
-  EXPECT_TRUE(vixl::s13.Is(Arm64Assembler::reg_s(S13)));
-  EXPECT_TRUE(vixl::s14.Is(Arm64Assembler::reg_s(S14)));
-  EXPECT_TRUE(vixl::s15.Is(Arm64Assembler::reg_s(S15)));
-  EXPECT_TRUE(vixl::s16.Is(Arm64Assembler::reg_s(S16)));
-  EXPECT_TRUE(vixl::s17.Is(Arm64Assembler::reg_s(S17)));
-  EXPECT_TRUE(vixl::s18.Is(Arm64Assembler::reg_s(S18)));
-  EXPECT_TRUE(vixl::s19.Is(Arm64Assembler::reg_s(S19)));
-  EXPECT_TRUE(vixl::s20.Is(Arm64Assembler::reg_s(S20)));
-  EXPECT_TRUE(vixl::s21.Is(Arm64Assembler::reg_s(S21)));
-  EXPECT_TRUE(vixl::s22.Is(Arm64Assembler::reg_s(S22)));
-  EXPECT_TRUE(vixl::s23.Is(Arm64Assembler::reg_s(S23)));
-  EXPECT_TRUE(vixl::s24.Is(Arm64Assembler::reg_s(S24)));
-  EXPECT_TRUE(vixl::s25.Is(Arm64Assembler::reg_s(S25)));
-  EXPECT_TRUE(vixl::s26.Is(Arm64Assembler::reg_s(S26)));
-  EXPECT_TRUE(vixl::s27.Is(Arm64Assembler::reg_s(S27)));
-  EXPECT_TRUE(vixl::s28.Is(Arm64Assembler::reg_s(S28)));
-  EXPECT_TRUE(vixl::s29.Is(Arm64Assembler::reg_s(S29)));
-  EXPECT_TRUE(vixl::s30.Is(Arm64Assembler::reg_s(S30)));
-  EXPECT_TRUE(vixl::s31.Is(Arm64Assembler::reg_s(S31)));
+  EXPECT_TRUE(vixl::aarch64::s0.Is(Arm64Assembler::reg_s(S0)));
+  EXPECT_TRUE(vixl::aarch64::s1.Is(Arm64Assembler::reg_s(S1)));
+  EXPECT_TRUE(vixl::aarch64::s2.Is(Arm64Assembler::reg_s(S2)));
+  EXPECT_TRUE(vixl::aarch64::s3.Is(Arm64Assembler::reg_s(S3)));
+  EXPECT_TRUE(vixl::aarch64::s4.Is(Arm64Assembler::reg_s(S4)));
+  EXPECT_TRUE(vixl::aarch64::s5.Is(Arm64Assembler::reg_s(S5)));
+  EXPECT_TRUE(vixl::aarch64::s6.Is(Arm64Assembler::reg_s(S6)));
+  EXPECT_TRUE(vixl::aarch64::s7.Is(Arm64Assembler::reg_s(S7)));
+  EXPECT_TRUE(vixl::aarch64::s8.Is(Arm64Assembler::reg_s(S8)));
+  EXPECT_TRUE(vixl::aarch64::s9.Is(Arm64Assembler::reg_s(S9)));
+  EXPECT_TRUE(vixl::aarch64::s10.Is(Arm64Assembler::reg_s(S10)));
+  EXPECT_TRUE(vixl::aarch64::s11.Is(Arm64Assembler::reg_s(S11)));
+  EXPECT_TRUE(vixl::aarch64::s12.Is(Arm64Assembler::reg_s(S12)));
+  EXPECT_TRUE(vixl::aarch64::s13.Is(Arm64Assembler::reg_s(S13)));
+  EXPECT_TRUE(vixl::aarch64::s14.Is(Arm64Assembler::reg_s(S14)));
+  EXPECT_TRUE(vixl::aarch64::s15.Is(Arm64Assembler::reg_s(S15)));
+  EXPECT_TRUE(vixl::aarch64::s16.Is(Arm64Assembler::reg_s(S16)));
+  EXPECT_TRUE(vixl::aarch64::s17.Is(Arm64Assembler::reg_s(S17)));
+  EXPECT_TRUE(vixl::aarch64::s18.Is(Arm64Assembler::reg_s(S18)));
+  EXPECT_TRUE(vixl::aarch64::s19.Is(Arm64Assembler::reg_s(S19)));
+  EXPECT_TRUE(vixl::aarch64::s20.Is(Arm64Assembler::reg_s(S20)));
+  EXPECT_TRUE(vixl::aarch64::s21.Is(Arm64Assembler::reg_s(S21)));
+  EXPECT_TRUE(vixl::aarch64::s22.Is(Arm64Assembler::reg_s(S22)));
+  EXPECT_TRUE(vixl::aarch64::s23.Is(Arm64Assembler::reg_s(S23)));
+  EXPECT_TRUE(vixl::aarch64::s24.Is(Arm64Assembler::reg_s(S24)));
+  EXPECT_TRUE(vixl::aarch64::s25.Is(Arm64Assembler::reg_s(S25)));
+  EXPECT_TRUE(vixl::aarch64::s26.Is(Arm64Assembler::reg_s(S26)));
+  EXPECT_TRUE(vixl::aarch64::s27.Is(Arm64Assembler::reg_s(S27)));
+  EXPECT_TRUE(vixl::aarch64::s28.Is(Arm64Assembler::reg_s(S28)));
+  EXPECT_TRUE(vixl::aarch64::s29.Is(Arm64Assembler::reg_s(S29)));
+  EXPECT_TRUE(vixl::aarch64::s30.Is(Arm64Assembler::reg_s(S30)));
+  EXPECT_TRUE(vixl::aarch64::s31.Is(Arm64Assembler::reg_s(S31)));
 }
 
 }  // namespace arm64
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
index df5080e..0fd9e5b 100644
--- a/compiler/utils/string_reference_test.cc
+++ b/compiler/utils/string_reference_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "utils/string_reference.h"
+#include "string_reference.h"
 
 #include <memory>
 
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
index bd0739f..d0c1656 100644
--- a/compiler/utils/type_reference.h
+++ b/compiler/utils/type_reference.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
-#include "utils/string_reference.h"
+#include "string_reference.h"
 
 namespace art {
 
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index dfc379f..f5f02cd 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -62,7 +62,7 @@
   libnativebridge \
   libnativeloader \
   libsigchain_dummy \
-  libvixl \
+  libvixl-arm64 \
   liblog \
   libz \
   libbacktrace \
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c133980..0d1d4d7 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -2038,7 +2038,7 @@
           location.c_str(), location.c_str(), kVerifyChecksum, &error_msg, opened_dex_files)) {
         // If we fail to open the dex file because it's been stripped, try to open the dex file
         // from its corresponding oat file.
-        OatFileAssistant oat_file_assistant(location.c_str(), isa, false, false);
+        OatFileAssistant oat_file_assistant(location.c_str(), isa, false);
         std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
         if (oat_file == nullptr) {
           LOG(WARNING) << "Failed to open dex file and associated oat file for '" << location
@@ -2145,7 +2145,8 @@
     TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
     if (zip_fd_ != -1) {
       DCHECK_EQ(oat_writers_.size(), 1u);
-      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+      if (!oat_writers_[0]->AddZippedDexFilesSource(File(zip_fd_, /* check_usage */ false),
+                                                    zip_location_.c_str())) {
         return false;
       }
     } else if (oat_writers_.size() > 1u) {
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 93351e9..c076b5a 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include <regex>
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
@@ -207,7 +208,7 @@
     std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
     std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
 
-    Copy(GetDexSrc1(), dex_location);
+    Copy(GetTestDexFileName(), dex_location);
 
     std::vector<std::string> copy(extra_args);
 
@@ -226,7 +227,11 @@
     CheckResult(expect_use);
   }
 
-  void CheckResult(bool expect_use) {
+  virtual std::string GetTestDexFileName() {
+    return GetDexSrc1();
+  }
+
+  virtual void CheckResult(bool expect_use) {
     if (kIsTargetBuild) {
       CheckTargetResult(expect_use);
     } else {
@@ -234,13 +239,13 @@
     }
   }
 
-  void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+  virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
-  void CheckHostResult(bool expect_use) {
+  virtual void CheckHostResult(bool expect_use) {
     if (!kIsTargetBuild) {
       if (expect_use) {
         EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
@@ -253,7 +258,7 @@
   }
 
   // Check whether the dex2oat run was really successful.
-  void CheckValidity() {
+  virtual void CheckValidity() {
     if (kIsTargetBuild) {
       CheckTargetValidity();
     } else {
@@ -261,14 +266,14 @@
     }
   }
 
-  void CheckTargetValidity() {
+  virtual void CheckTargetValidity() {
     // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
     //       something for variants with file descriptor where we can control the lifetime of
     //       the swap file and thus take a look at it.
   }
 
   // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
-  void CheckHostValidity() {
+  virtual void CheckHostValidity() {
     EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
   }
 };
@@ -297,6 +302,122 @@
           { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
 }
 
+class Dex2oatSwapUseTest : public Dex2oatSwapTest {
+ protected:
+  void CheckHostResult(bool expect_use) OVERRIDE {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  std::string GetTestDexFileName() OVERRIDE {
+    // Use Statics as it has a handful of functions.
+    return CommonRuntimeTest::GetTestDexFileName("Statics");
+  }
+
+  void GrabResult1() {
+    if (!kIsTargetBuild) {
+      native_alloc_1_ = ParseNativeAlloc();
+      swap_1_ = ParseSwap(false /* expected */);
+    } else {
+      native_alloc_1_ = std::numeric_limits<size_t>::max();
+      swap_1_ = 0;
+    }
+  }
+
+  void GrabResult2() {
+    if (!kIsTargetBuild) {
+      native_alloc_2_ = ParseNativeAlloc();
+      swap_2_ = ParseSwap(true /* expected */);
+    } else {
+      native_alloc_2_ = 0;
+      swap_2_ = std::numeric_limits<size_t>::max();
+    }
+  }
+
+ private:
+  size_t ParseNativeAlloc() {
+    std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch native_alloc_match;
+    bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex);
+    if (!found) {
+      EXPECT_TRUE(found);
+      return 0;
+    }
+    if (native_alloc_match.size() != 2U) {
+      EXPECT_EQ(native_alloc_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(native_alloc_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+  size_t ParseSwap(bool expected) {
+    std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)");
+    std::smatch swap_match;
+    bool found = std::regex_search(output_, swap_match, swap_regex);
+    if (found != expected) {
+      EXPECT_EQ(expected, found);
+      return 0;
+    }
+
+    if (!found) {
+      return 0;
+    }
+
+    if (swap_match.size() != 2U) {
+      EXPECT_EQ(swap_match.size(), 2U);
+      return 0;
+    }
+
+    std::istringstream stream(swap_match[1].str());
+    size_t value;
+    stream >> value;
+
+    return value;
+  }
+
+ protected:
+  size_t native_alloc_1_;
+  size_t native_alloc_2_;
+
+  size_t swap_1_;
+  size_t swap_2_;
+};
+
+TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  RunTest(false /* use_fd */,
+          false /* expect_use */);
+  GrabResult1();
+  std::string output_1 = output_;
+
+  output_ = "";
+
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  GrabResult2();
+  std::string output_2 = output_;
+
+  if (native_alloc_2_ >= native_alloc_1_ || swap_1_ >= swap_2_) {
+    EXPECT_LT(native_alloc_2_, native_alloc_1_);
+    EXPECT_LT(swap_1_, swap_2_);
+
+    LOG(ERROR) << output_1;
+    LOG(ERROR) << output_2;
+  }
+}
+
 class Dex2oatVeryLargeTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 565a8f0..96c3267 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -118,7 +118,7 @@
  * "[I" becomes "int[]".  Also converts '$' to '.', which means this
  * form can't be converted back to a descriptor.
  */
-static char* descriptorToDot(const char* str) {
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
   int targetLen = strlen(str);
   int offset = 0;
 
@@ -145,8 +145,7 @@
   }
 
   // Copy class name over.
-  char* newStr = reinterpret_cast<char*>(
-      malloc(targetLen + arrayDepth * 2 + 1));
+  std::unique_ptr<char[]> newStr(new char[targetLen + arrayDepth * 2 + 1]);
   int i = 0;
   for (; i < targetLen; i++) {
     const char ch = str[offset + i];
@@ -165,12 +164,10 @@
 
 /*
  * Converts the class name portion of a type descriptor to human-readable
- * "dotted" form.
- *
- * Returns a newly-allocated string.
+ * "dotted" form. For example, "Ljava/lang/String;" becomes "String".
  */
-static char* descriptorClassToDot(const char* str) {
-  // Reduce to just the class name, trimming trailing ';'.
+static std::unique_ptr<char[]> descriptorClassToDot(const char* str) {
+  // Reduce to just the class name prefix.
   const char* lastSlash = strrchr(str, '/');
   if (lastSlash == nullptr) {
     lastSlash = str + 1;  // start past 'L'
@@ -178,13 +175,14 @@
     lastSlash++;          // start past '/'
   }
 
-  char* newStr = strdup(lastSlash);
-  newStr[strlen(lastSlash) - 1] = '\0';
-  for (char* cp = newStr; *cp != '\0'; cp++) {
-    if (*cp == '$') {
-      *cp = '.';
-    }
+  // Copy class name over, trimming trailing ';'.
+  const int targetLen = strlen(lastSlash);
+  std::unique_ptr<char[]> newStr(new char[targetLen]);
+  for (int i = 0; i < targetLen - 1; i++) {
+    const char ch = lastSlash[i];
+    newStr[i] = ch == '$' ? '.' : ch;
   }  // for
+  newStr[targetLen - 1] = '\0';
   return newStr;
 }
 
@@ -723,9 +721,8 @@
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     fprintf(gOutFile, "    #%d              : '%s'\n", i, interfaceName);
   } else {
-    char* dotted = descriptorToDot(interfaceName);
-    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dotted);
-    free(dotted);
+    std::unique_ptr<char[]> dot(descriptorToDot(interfaceName));
+    fprintf(gOutFile, "<implements name=\"%s\">\n</implements>\n", dot.get());
   }
 }
 
@@ -1128,11 +1125,9 @@
   const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
 
   // Generate header.
-  char* tmp = descriptorToDot(backDescriptor);
-  fprintf(gOutFile, "%06x:                                        "
-          "|[%06x] %s.%s:%s\n",
-          codeOffset, codeOffset, tmp, name, signature.ToString().c_str());
-  free(tmp);
+  std::unique_ptr<char[]> dot(descriptorToDot(backDescriptor));
+  fprintf(gOutFile, "%06x:                                        |[%06x] %s.%s:%s\n",
+          codeOffset, codeOffset, dot.get(), name, signature.ToString().c_str());
 
   // Iterate over all instructions.
   const u2* insns = pCode->insns_;
@@ -1211,12 +1206,10 @@
 
     // Method name and prototype.
     if (constructor) {
-      char* tmp = descriptorClassToDot(backDescriptor);
-      fprintf(gOutFile, "<constructor name=\"%s\"\n", tmp);
-      free(tmp);
-      tmp = descriptorToDot(backDescriptor);
-      fprintf(gOutFile, " type=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorClassToDot(backDescriptor));
+      fprintf(gOutFile, "<constructor name=\"%s\"\n", dot.get());
+      dot = descriptorToDot(backDescriptor);
+      fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     } else {
       fprintf(gOutFile, "<method name=\"%s\"\n", name);
       const char* returnType = strrchr(typeDescriptor, ')');
@@ -1224,9 +1217,8 @@
         fprintf(stderr, "bad method type descriptor '%s'\n", typeDescriptor);
         goto bail;
       }
-      char* tmp = descriptorToDot(returnType+1);
-      fprintf(gOutFile, " return=\"%s\"\n", tmp);
-      free(tmp);
+      std::unique_ptr<char[]> dot(descriptorToDot(returnType + 1));
+      fprintf(gOutFile, " return=\"%s\"\n", dot.get());
       fprintf(gOutFile, " abstract=%s\n", quotedBool((flags & kAccAbstract) != 0));
       fprintf(gOutFile, " native=%s\n", quotedBool((flags & kAccNative) != 0));
       fprintf(gOutFile, " synchronized=%s\n", quotedBool(
@@ -1259,7 +1251,7 @@
         } while (*cp++ != ';');
       } else {
         // Primitive char, copy it.
-        if (strchr("ZBCSIFJD", *base) == NULL) {
+        if (strchr("ZBCSIFJD", *base) == nullptr) {
           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
           break;  // while
         }
@@ -1267,10 +1259,9 @@
       }
       // Null terminate and display.
       *cp++ = '\0';
-      char* tmp = descriptorToDot(tmpBuf);
+      std::unique_ptr<char[]> dot(descriptorToDot(tmpBuf));
       fprintf(gOutFile, "<parameter name=\"arg%d\" type=\"%s\">\n"
-                        "</parameter>\n", argNum++, tmp);
-      free(tmp);
+                        "</parameter>\n", argNum++, dot.get());
     }  // while
     free(tmpBuf);
     if (constructor) {
@@ -1312,9 +1303,8 @@
     }
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "<field name=\"%s\"\n", name);
-    char *tmp = descriptorToDot(typeDescriptor);
-    fprintf(gOutFile, " type=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorToDot(typeDescriptor));
+    fprintf(gOutFile, " type=\"%s\"\n", dot.get());
     fprintf(gOutFile, " transient=%s\n", quotedBool((flags & kAccTransient) != 0));
     fprintf(gOutFile, " volatile=%s\n", quotedBool((flags & kAccVolatile) != 0));
     // The "value=" is not knowable w/o parsing annotations.
@@ -1469,13 +1459,11 @@
     }
     fprintf(gOutFile, "  Interfaces        -\n");
   } else {
-    char* tmp = descriptorClassToDot(classDescriptor);
-    fprintf(gOutFile, "<class name=\"%s\"\n", tmp);
-    free(tmp);
+    std::unique_ptr<char[]> dot(descriptorClassToDot(classDescriptor));
+    fprintf(gOutFile, "<class name=\"%s\"\n", dot.get());
     if (superclassDescriptor != nullptr) {
-      tmp = descriptorToDot(superclassDescriptor);
-      fprintf(gOutFile, " extends=\"%s\"\n", tmp);
-      free(tmp);
+      dot = descriptorToDot(superclassDescriptor);
+      fprintf(gOutFile, " extends=\"%s\"\n", dot.get());
     }
     fprintf(gOutFile, " interface=%s\n",
             quotedBool((pClassDef.access_flags_ & kAccInterface) != 0));
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 6f19df5..a1bde0e 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -60,18 +60,17 @@
  * final ";" (if any) have been removed and all occurrences of '/'
  * have been changed to '.'.
  */
-static char* descriptorToDot(const char* str) {
-  size_t at = strlen(str);
+static std::unique_ptr<char[]> descriptorToDot(const char* str) {
+  size_t len = strlen(str);
   if (str[0] == 'L') {
-    at -= 2;  // Two fewer chars to copy.
-    str++;
+    len -= 2;  // Two fewer chars to copy (trims L and ;).
+    str++;     // Start past 'L'.
   }
-  char* newStr = reinterpret_cast<char*>(malloc(at + 1));
-  newStr[at] = '\0';
-  while (at > 0) {
-    at--;
-    newStr[at] = (str[at] == '/') ? '.' : str[at];
+  std::unique_ptr<char[]> newStr(new char[len + 1]);
+  for (size_t i = 0; i < len; i++) {
+    newStr[i] = (str[i] == '/') ? '.' : str[i];
   }
+  newStr[len] = '\0';
   return newStr;
 }
 
@@ -103,14 +102,13 @@
   const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(idx);
   const char* methodName = pDexFile->StringDataByIdx(pMethodId.name_idx_);
   const char* classDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-  char* className = descriptorToDot(classDescriptor);
+  std::unique_ptr<char[]> className(descriptorToDot(classDescriptor));
   const u4 insnsOff = codeOffset + 0x10;
 
   // Don't list methods that do not match a particular query.
   if (gOptions.methodToFind != nullptr &&
-      (strcmp(gOptions.classToFind, className) != 0 ||
+      (strcmp(gOptions.classToFind, className.get()) != 0 ||
        strcmp(gOptions.methodToFind, methodName) != 0)) {
-    free(className);
     return;
   }
 
@@ -130,10 +128,9 @@
   // Dump actual method information.
   fprintf(gOutFile, "0x%08x %d %s %s %s %s %d\n",
           insnsOff, pCode->insns_size_in_code_units_ * 2,
-          className, methodName, typeDesc, fileName, firstLine);
+          className.get(), methodName, typeDesc, fileName, firstLine);
 
   free(typeDesc);
-  free(className);
 }
 
 /*
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index d76bbb8..6905f88 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -91,9 +91,9 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For disassembler_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixl
+     LOCAL_SHARED_LIBRARIES += libvixl-arm64
   else
-     LOCAL_SHARED_LIBRARIES += libvixl
+     LOCAL_SHARED_LIBRARIES += libvixl-arm64
   endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 1a3e3f5..c410cd9 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -782,23 +782,13 @@
         args << Rm;
 
         // Shift operand.
-        bool noShift = (imm5 == 0 && shift_type != 0x3);
+        bool noShift = (imm5 == 0 && shift_type == 0x0);
         if (!noShift) {
           args << ", ";
-          switch (shift_type) {
-            case 0x0: args << "lsl"; break;
-            case 0x1: args << "lsr"; break;
-            case 0x2: args << "asr"; break;
-            case 0x3:
-              if (imm5 == 0) {
-                args << "rrx";
-              } else {
-                args << "ror #" << imm5;
-              }
-              break;
-          }
-          if (shift_type != 0x3 /* rrx */) {
-            args << StringPrintf(" #%d", (0 != imm5 || 0 == shift_type) ? imm5 : 32);
+          if (shift_type == 0x3u && imm5 == 0u) {
+            args << "rrx";
+          } else {
+            args << kThumb2ShiftOperations[shift_type] << " #" << ((0 != imm5) ? imm5 : 32);
           }
         }
 
@@ -1516,82 +1506,82 @@
           }
           break;
         }
-      default:      // more formats
-        if ((op2 >> 4) == 2) {      // 010xxxx
-          // data processing (register)
-          if ((instr & 0x0080f0f0) == 0x0000f000) {
-            // LSL, LSR, ASR, ROR
-            uint32_t shift_op = (instr >> 21) & 3;
-            uint32_t S = (instr >> 20) & 1;
-            ArmRegister Rd(instr, 8);
+        default:      // more formats
+          if ((op2 >> 4) == 2) {      // 010xxxx
+            // data processing (register)
+            if ((instr & 0x0080f0f0) == 0x0000f000) {
+              // LSL, LSR, ASR, ROR
+              uint32_t shift_op = (instr >> 21) & 3;
+              uint32_t S = (instr >> 20) & 1;
+              ArmRegister Rd(instr, 8);
+              ArmRegister Rn(instr, 16);
+              ArmRegister Rm(instr, 0);
+              opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
+              args << Rd << ", " << Rn << ", " << Rm;
+            }
+          } else if ((op2 >> 3) == 6) {       // 0110xxx
+            // Multiply, multiply accumulate, and absolute difference
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0x1;
+            ArmRegister Ra(instr, 12);
             ArmRegister Rn(instr, 16);
             ArmRegister Rm(instr, 0);
-            opcode << kThumb2ShiftOperations[shift_op] << (S != 0 ? "s" : "");
-            args << Rd << ", " << Rn << ", " << Rm;
-          }
-        } else if ((op2 >> 3) == 6) {       // 0110xxx
-          // Multiply, multiply accumulate, and absolute difference
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0x1;
-          ArmRegister Ra(instr, 12);
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          switch (op1) {
-          case 0:
-            if (op2 == 0) {
-              if (Ra.r == 0xf) {
-                opcode << "mul";
-                args << Rd << ", " << Rn << ", " << Rm;
+            ArmRegister Rd(instr, 8);
+            switch (op1) {
+            case 0:
+              if (op2 == 0) {
+                if (Ra.r == 0xf) {
+                  opcode << "mul";
+                  args << Rd << ", " << Rn << ", " << Rm;
+                } else {
+                  opcode << "mla";
+                  args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+                }
               } else {
-                opcode << "mla";
+                opcode << "mls";
                 args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
               }
-            } else {
-              opcode << "mls";
-              args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+              break;
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;        // do these sometime
             }
-            break;
-          case 1:
-          case 2:
-          case 3:
-          case 4:
-          case 5:
-          case 6:
-              break;        // do these sometime
+          } else if ((op2 >> 3) == 7) {       // 0111xxx
+            // Long multiply, long multiply accumulate, and divide
+            op1 = (instr >> 20) & 0x7;
+            op2 = (instr >> 4) & 0xf;
+            ArmRegister Rn(instr, 16);
+            ArmRegister Rm(instr, 0);
+            ArmRegister Rd(instr, 8);
+            ArmRegister RdHi(instr, 8);
+            ArmRegister RdLo(instr, 12);
+            switch (op1) {
+            case 0:
+              opcode << "smull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 1:
+              opcode << "sdiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 2:
+              opcode << "umull";
+              args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+              break;
+            case 3:
+              opcode << "udiv";
+              args << Rd << ", " << Rn << ", " << Rm;
+              break;
+            case 4:
+            case 5:
+            case 6:
+              break;      // TODO: when we generate these...
+            }
           }
-        } else if ((op2 >> 3) == 7) {       // 0111xxx
-          // Long multiply, long multiply accumulate, and divide
-          op1 = (instr >> 20) & 0x7;
-          op2 = (instr >> 4) & 0xf;
-          ArmRegister Rn(instr, 16);
-          ArmRegister Rm(instr, 0);
-          ArmRegister Rd(instr, 8);
-          ArmRegister RdHi(instr, 8);
-          ArmRegister RdLo(instr, 12);
-          switch (op1) {
-          case 0:
-            opcode << "smull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 1:
-            opcode << "sdiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 2:
-            opcode << "umull";
-            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
-            break;
-          case 3:
-            opcode << "udiv";
-            args << Rd << ", " << Rn << ", " << Rm;
-            break;
-          case 4:
-          case 5:
-          case 6:
-            break;      // TODO: when we generate these...
-          }
-        }
       }
       break;
     default:
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 6a9afe5..a93f7d5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -24,6 +24,8 @@
 #include "base/stringprintf.h"
 #include "thread.h"
 
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
 namespace art {
 namespace arm64 {
 
@@ -38,15 +40,14 @@
   LR  = 30
 };
 
-void CustomDisassembler::AppendRegisterNameToOutput(
-    const vixl::Instruction* instr,
-    const vixl::CPURegister& reg) {
+void CustomDisassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                                    const CPURegister& reg) {
   USE(instr);
   if (reg.IsRegister() && reg.Is64Bits()) {
-    if (reg.code() == TR) {
+    if (reg.GetCode() == TR) {
       AppendToOutput("tr");
       return;
-    } else if (reg.code() == LR) {
+    } else if (reg.GetCode() == LR) {
       AppendToOutput("lr");
       return;
     }
@@ -56,7 +57,7 @@
   Disassembler::AppendRegisterNameToOutput(instr, reg);
 }
 
-void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadLiteral(const Instruction* instr) {
   Disassembler::VisitLoadLiteral(instr);
 
   if (!read_literals_) {
@@ -66,27 +67,27 @@
   // Get address of literal. Bail if not within expected buffer range to
   // avoid trying to fetch invalid literals (we can encounter this when
   // interpreting raw data as instructions).
-  void* data_address = instr->LiteralAddress<void*>();
+  void* data_address = instr->GetLiteralAddress<void*>();
   if (data_address < base_address_ || data_address >= end_address_) {
     AppendToOutput(" (?)");
     return;
   }
 
   // Output information on literal.
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  Instr op = instr->Mask(LoadLiteralMask);
   switch (op) {
-    case vixl::LDR_w_lit:
-    case vixl::LDR_x_lit:
-    case vixl::LDRSW_x_lit: {
-      int64_t data = op == vixl::LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
-                                           : *reinterpret_cast<int32_t*>(data_address);
+    case LDR_w_lit:
+    case LDR_x_lit:
+    case LDRSW_x_lit: {
+      int64_t data = op == LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
+                                     : *reinterpret_cast<int32_t*>(data_address);
       AppendToOutput(" (0x%" PRIx64 " / %" PRId64 ")", data, data);
       break;
     }
-    case vixl::LDR_s_lit:
-    case vixl::LDR_d_lit: {
-      double data = (op == vixl::LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
-                                            : *reinterpret_cast<double*>(data_address);
+    case LDR_s_lit:
+    case LDR_d_lit: {
+      double data = (op == LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
+                                      : *reinterpret_cast<double*>(data_address);
       AppendToOutput(" (%g)", data);
       break;
     }
@@ -95,11 +96,11 @@
   }
 }
 
-void CustomDisassembler::VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) {
+void CustomDisassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   Disassembler::VisitLoadStoreUnsignedOffset(instr);
 
-  if (instr->Rn() == TR) {
-    int64_t offset = instr->ImmLSUnsigned() << instr->SizeLS();
+  if (instr->GetRn() == TR) {
+    int64_t offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
     std::ostringstream tmp_stream;
     Thread::DumpThreadOffset<8>(tmp_stream, static_cast<uint32_t>(offset));
     AppendToOutput(" ; %s", tmp_stream.str().c_str());
@@ -107,15 +108,15 @@
 }
 
 size_t DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin) {
-  const vixl::Instruction* instr = reinterpret_cast<const vixl::Instruction*>(begin);
+  const Instruction* instr = reinterpret_cast<const Instruction*>(begin);
   decoder.Decode(instr);
     os << FormatInstructionPointer(begin)
-     << StringPrintf(": %08x\t%s\n", instr->InstructionBits(), disasm.GetOutput());
-  return vixl::kInstructionSize;
+     << StringPrintf(": %08x\t%s\n", instr->GetInstructionBits(), disasm.GetOutput());
+  return kInstructionSize;
 }
 
 void DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) {
-  for (const uint8_t* cur = begin; cur < end; cur += vixl::kInstructionSize) {
+  for (const uint8_t* cur = begin; cur < end; cur += kInstructionSize) {
     Dump(os, cur);
   }
 }
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index a4e5ee8..c64d8ea 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -21,34 +21,35 @@
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "vixl/a64/decoder-a64.h"
-#include "vixl/a64/disasm-a64.h"
+#include "a64/decoder-a64.h"
+#include "a64/disasm-a64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
 namespace arm64 {
 
-class CustomDisassembler FINAL : public vixl::Disassembler {
+class CustomDisassembler FINAL : public vixl::aarch64::Disassembler {
  public:
   explicit CustomDisassembler(DisassemblerOptions* options)
-      : vixl::Disassembler(),
+      : vixl::aarch64::Disassembler(),
         read_literals_(options->can_read_literals_),
         base_address_(options->base_address_),
         end_address_(options->end_address_) {
     if (!options->absolute_addresses_) {
-      MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
+      MapCodeAddress(0,
+                     reinterpret_cast<const vixl::aarch64::Instruction*>(options->base_address_));
     }
   }
 
   // Use register aliases in the disassembly.
-  void AppendRegisterNameToOutput(const vixl::Instruction* instr,
-                                  const vixl::CPURegister& reg) OVERRIDE;
+  void AppendRegisterNameToOutput(const vixl::aarch64::Instruction* instr,
+                                  const vixl::aarch64::CPURegister& reg) OVERRIDE;
 
   // Improve the disassembly of literal load instructions.
-  void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadLiteral(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
   // Improve the disassembly of thread offset.
-  void VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) OVERRIDE;
+  void VisitLoadStoreUnsignedOffset(const vixl::aarch64::Instruction* instr) OVERRIDE;
 
  private:
   // Indicate if the disassembler should read data loaded from literal pools.
@@ -75,7 +76,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
-  vixl::Decoder decoder;
+  vixl::aarch64::Decoder decoder;
   CustomDisassembler disasm;
 
   DISALLOW_COPY_AND_ASSIGN(DisassemblerArm64);
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 1f74c93..7f6a7ba 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -243,7 +243,38 @@
   return address.str();
 }
 
+size_t DisassemblerX86::DumpNops(std::ostream& os, const uint8_t* instr) {
+static constexpr uint8_t kNops[][10] = {
+      { },
+      { 0x90 },
+      { 0x66, 0x90 },
+      { 0x0f, 0x1f, 0x00 },
+      { 0x0f, 0x1f, 0x40, 0x00 },
+      { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
+      { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+      { 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
+  };
+
+  for (size_t i = 1; i < arraysize(kNops); ++i) {
+    if (memcmp(instr, kNops[i], i) == 0) {
+      os << FormatInstructionPointer(instr)
+         << StringPrintf(": %22s    \t       nop \n", DumpCodeHex(instr, instr + i).c_str());
+      return i;
+    }
+  }
+
+  return 0;
+}
+
 size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) {
+  size_t nop_size = DumpNops(os, instr);
+  if (nop_size != 0u) {
+    return nop_size;
+  }
+
   const uint8_t* begin_instr = instr;
   bool have_prefixes = true;
   uint8_t prefix[4] = {0, 0, 0, 0};
@@ -400,6 +431,7 @@
   case 0x89: opcode1 = "mov"; store = true; has_modrm = true; break;
   case 0x8A: opcode1 = "mov"; load = true; has_modrm = true; byte_operand = true; break;
   case 0x8B: opcode1 = "mov"; load = true; has_modrm = true; break;
+  case 0x9D: opcode1 = "popf"; break;
 
   case 0x0F:  // 2 byte extended opcode
     instr++;
diff --git a/disassembler/disassembler_x86.h b/disassembler/disassembler_x86.h
index 71c3e41..31b62bc 100644
--- a/disassembler/disassembler_x86.h
+++ b/disassembler/disassembler_x86.h
@@ -33,6 +33,7 @@
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
+  size_t DumpNops(std::ostream& os, const uint8_t* instr);
   size_t DumpInstruction(std::ostream& os, const uint8_t* instr);
 
   std::string DumpAddress(uint8_t mod, uint8_t rm, uint8_t rex64, uint8_t rex_w, bool no_ops,
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 214222d..f5669d7 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -729,7 +729,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
@@ -810,7 +810,7 @@
           os << "        " << reinterpret_cast<void*>(obj) << " ";
           os << "  entryPointFromJni: "
              << reinterpret_cast<const void*>(
-                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
+                    art_method->GetDataPtrSize(pointer_size)) << ", ";
           os << "  entryPointFromQuickCompiledCode: "
              << reinterpret_cast<const void*>(
                     art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 3f031a3..64349b5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -56,8 +56,9 @@
 #include "os.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "stack_map.h"
 #include "ScopedLocalRef.h"
+#include "stack_map.h"
+#include "string_reference.h"
 #include "thread_list.h"
 #include "type_lookup_table.h"
 #include "verifier/method_verifier.h"
@@ -447,6 +448,28 @@
       os << StringPrintf("0x%08x\n\n", resolved_addr2instr_);
     }
 
+    // Dumping the dex file overview is compact enough to do even if header only.
+    DexFileData cumulative;
+    for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+      const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
+      CHECK(oat_dex_file != nullptr);
+      std::string error_msg;
+      const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
+      if (dex_file == nullptr) {
+        os << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation() << "': "
+           << error_msg;
+        continue;
+      }
+      DexFileData data(*dex_file);
+      os << "Dex file data for " << dex_file->GetLocation() << "\n";
+      data.Dump(os);
+      os << "\n";
+      cumulative.Add(data);
+    }
+    os << "Cumulative dex file data\n";
+    cumulative.Dump(os);
+    os << "\n";
+
     if (!options_.dump_header_only_) {
       for (size_t i = 0; i < oat_dex_files_.size(); i++) {
         const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -568,6 +591,122 @@
     offsets_.insert(oat_method.GetVmapTableOffset());
   }
 
+  // Dex file data, may be for multiple different dex files.
+  class DexFileData {
+   public:
+    DexFileData() {}
+
+    explicit DexFileData(const DexFile& dex_file)
+        : num_string_ids_(dex_file.NumStringIds()),
+          num_method_ids_(dex_file.NumMethodIds()),
+          num_field_ids_(dex_file.NumFieldIds()),
+          num_type_ids_(dex_file.NumTypeIds()),
+          num_class_defs_(dex_file.NumClassDefs()) {
+      for (size_t class_def_index = 0; class_def_index < num_class_defs_; ++class_def_index) {
+        const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
+        WalkClass(dex_file, class_def);
+      }
+    }
+
+    void Add(const DexFileData& other) {
+      AddAll(unique_string_ids_from_code_, other.unique_string_ids_from_code_);
+      num_string_ids_from_code_ += other.num_string_ids_from_code_;
+      AddAll(dex_code_item_ptrs_, other.dex_code_item_ptrs_);
+      dex_code_bytes_ += other.dex_code_bytes_;
+      num_string_ids_ += other.num_string_ids_;
+      num_method_ids_ += other.num_method_ids_;
+      num_field_ids_ += other.num_field_ids_;
+      num_type_ids_ += other.num_type_ids_;
+      num_class_defs_ += other.num_class_defs_;
+    }
+
+    void Dump(std::ostream& os) {
+      os << "Num string ids: " << num_string_ids_ << "\n";
+      os << "Num method ids: " << num_method_ids_ << "\n";
+      os << "Num field ids: " << num_field_ids_ << "\n";
+      os << "Num type ids: " << num_type_ids_ << "\n";
+      os << "Num class defs: " << num_class_defs_ << "\n";
+      os << "Unique strings loaded from dex code: " << unique_string_ids_from_code_.size() << "\n";
+      os << "Total strings loaded from dex code: " << num_string_ids_from_code_ << "\n";
+      os << "Number of unique dex code items: " << dex_code_item_ptrs_.size() << "\n";
+      os << "Total number of dex code bytes: " << dex_code_bytes_ << "\n";
+    }
+
+  private:
+    void WalkClass(const DexFile& dex_file, const DexFile::ClassDef& class_def) {
+      const uint8_t* class_data = dex_file.GetClassData(class_def);
+      if (class_data == nullptr) {  // empty class such as a marker interface?
+        return;
+      }
+      ClassDataItemIterator it(dex_file, class_data);
+      SkipAllFields(it);
+      while (it.HasNextDirectMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      while (it.HasNextVirtualMethod()) {
+        WalkCodeItem(dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+
+    void WalkCodeItem(const DexFile& dex_file, const DexFile::CodeItem* code_item) {
+      if (code_item == nullptr) {
+        return;
+      }
+      const size_t code_item_size = code_item->insns_size_in_code_units_;
+      const uint16_t* code_ptr = code_item->insns_;
+      const uint16_t* code_end = code_item->insns_ + code_item_size;
+
+      // If we inserted a new dex code item pointer, add to total code bytes.
+      if (dex_code_item_ptrs_.insert(code_ptr).second) {
+        dex_code_bytes_ += code_item_size * sizeof(code_ptr[0]);
+      }
+
+      while (code_ptr < code_end) {
+        const Instruction* inst = Instruction::At(code_ptr);
+        switch (inst->Opcode()) {
+          case Instruction::CONST_STRING: {
+            const uint32_t string_index = inst->VRegB_21c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          case Instruction::CONST_STRING_JUMBO: {
+            const uint32_t string_index = inst->VRegB_31c();
+            unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
+            ++num_string_ids_from_code_;
+            break;
+          }
+          default:
+            break;
+        }
+
+        code_ptr += inst->SizeInCodeUnits();
+      }
+    }
+
+    // Unique string ids loaded from dex code.
+    std::set<StringReference, StringReferenceComparator> unique_string_ids_from_code_;
+
+    // Total string ids loaded from dex code.
+    size_t num_string_ids_from_code_ = 0;
+
+    // Unique code pointers.
+    std::set<const void*> dex_code_item_ptrs_;
+
+    // Total "unique" dex code bytes.
+    size_t dex_code_bytes_ = 0;
+
+    // Other dex ids.
+    size_t num_string_ids_ = 0;
+    size_t num_method_ids_ = 0;
+    size_t num_field_ids_ = 0;
+    size_t num_type_ids_ = 0;
+    size_t num_class_defs_ = 0;
+  };
+
   bool DumpOatDexFile(std::ostream& os, const OatFile::OatDexFile& oat_dex_file) {
     bool success = true;
     bool stop_analysis = false;
@@ -578,7 +717,6 @@
     // Print embedded dex file data range.
     const uint8_t* const oat_file_begin = oat_dex_file.GetOatFile()->Begin();
     const uint8_t* const dex_file_pointer = oat_dex_file.GetDexFilePointer();
-    std::set<uint32_t> string_ids;
     uint32_t dex_offset = dchecked_integral_cast<uint32_t>(dex_file_pointer - oat_file_begin);
     os << StringPrintf("dex-file: 0x%08x..0x%08x\n",
                        dex_offset,
@@ -623,8 +761,10 @@
          << " (" << oat_class.GetStatus() << ")"
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
-      if (options_.list_classes_) continue;
-      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis, string_ids)) {
+      if (options_.list_classes_) {
+        continue;
+      }
+      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
         success = false;
       }
       if (stop_analysis) {
@@ -632,7 +772,7 @@
         return success;
       }
     }
-    os << "Number of unique strings loaded from dex code: " << string_ids.size() << "\n";
+    os << "\n";
     os << std::flush;
     return success;
   }
@@ -726,8 +866,7 @@
 
   bool DumpOatClass(VariableIndentationOutputStream* vios,
                     const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, bool* stop_analysis,
-                    std::set<uint32_t>& string_ids) {
+                    const DexFile::ClassDef& class_def, bool* stop_analysis) {
     bool success = true;
     bool addr_found = false;
     const uint8_t* class_data = dex_file.GetClassData(class_def);
@@ -741,7 +880,7 @@
     while (it.HasNextDirectMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
+                         it.GetRawMemberAccessFlags(), &addr_found)) {
         success = false;
       }
       if (addr_found) {
@@ -754,7 +893,7 @@
     while (it.HasNextVirtualMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
+                         it.GetRawMemberAccessFlags(), &addr_found)) {
         success = false;
       }
       if (addr_found) {
@@ -779,35 +918,9 @@
                      uint32_t class_method_index,
                      const OatFile::OatClass& oat_class, const DexFile& dex_file,
                      uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, bool* addr_found,
-                     std::set<uint32_t>& string_ids) {
+                     uint32_t method_access_flags, bool* addr_found) {
     bool success = true;
 
-    if (code_item != nullptr) {
-      const uint16_t* code_ptr = code_item->insns_;
-      const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
-
-      while (code_ptr < code_end) {
-        const Instruction* inst = Instruction::At(code_ptr);
-        switch (inst->Opcode()) {
-          case Instruction::CONST_STRING: {
-            uint32_t string_index = inst->VRegB_21c();
-            string_ids.insert(string_index);
-            break;
-          }
-          case Instruction::CONST_STRING_JUMBO: {
-            uint32_t string_index = inst->VRegB_31c();
-            string_ids.insert(string_index);
-            break;
-          }
-
-          default:
-            break;
-        }
-
-        code_ptr += inst->SizeInCodeUnits();
-      }
-    }
     // TODO: Support regex
     std::string method_name = dex_file.GetMethodName(dex_file.GetMethodId(dex_method_idx));
     if (method_name.find(options_.method_filter_) == std::string::npos) {
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index c7ced8a..63dc476 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 
 #include "common_runtime_test.h"
 
 #include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
 #include "runtime/arch/instruction_set.h"
 #include "runtime/gc/heap.h"
 #include "runtime/gc/space/image_space.h"
@@ -58,26 +59,130 @@
   };
 
   // Run the test with custom arguments.
-  bool Exec(Mode mode, const std::vector<std::string>& args, std::string* error_msg) {
+  bool Exec(Mode mode,
+            const std::vector<std::string>& args,
+            bool list_only,
+            std::string* error_msg) {
     std::string file_path = GetOatDumpFilePath();
 
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
 
+    // ScratchFile scratch;
     std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
     if (mode == kModeSymbolize) {
       exec_argv.push_back("--symbolize=" + core_oat_location_);
       exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else if (mode == kModeArt) {
-      exec_argv.push_back("--image=" + core_art_location_);
-      exec_argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
-      exec_argv.push_back("--output=/dev/null");
     } else {
-      CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-      exec_argv.push_back("--oat-file=" + core_oat_location_);
-      exec_argv.push_back("--output=/dev/null");
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (!list_only) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
     }
     exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-    return ::art::Exec(exec_argv, error_msg);
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      bool res = ::art::Exec(exec_argv, error_msg);
+      // Delete the runtime to prevent memory leaks and please valgrind.
+      delete Runtime::Current();
+      exit(res ? 0 : 1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
   }
 
  private:
@@ -89,37 +194,37 @@
 #if !defined(__arm__) && !defined(__mips__)
 TEST_F(OatDumpTest, TestImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestOatImage) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeOat, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeOat, {}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-dump:vmap"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestNoDisassemble) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--no-disassemble"}, /*list_only*/ false, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListClasses) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-classes"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestListMethods) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeArt, {"--list-methods"}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 
 TEST_F(OatDumpTest, TestSymbolize) {
   std::string error_msg;
-  ASSERT_TRUE(Exec(kModeSymbolize, {}, &error_msg)) << error_msg;
+  ASSERT_TRUE(Exec(kModeSymbolize, {}, /*list_only*/ true, &error_msg)) << error_msg;
 }
 #endif
 }  // namespace art
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 5bb61bb..569c5e9 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -748,8 +748,8 @@
   copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer(
       object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size);
   // No special handling for IMT conflict table since all pointers are moved by the same offset.
-  copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer(
-      object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size);
+  copy->SetDataPtrSize(RelocatedAddressOfPointer(
+      object->GetDataPtrSize(pointer_size)), pointer_size);
 }
 
 bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings,
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 4c68862..966587d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -27,9 +27,27 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
 
 // Used by soft float.
 // Single-precision FP arithmetics.
@@ -103,7 +121,39 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
+  qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
+  qpoints->pReadBarrierMarkReg15 = nullptr;  // Cannot use register 15 (PC) to pass arguments.
+  // ARM has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  qpoints->pReadBarrierMarkReg30 = nullptr;
+  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index d940164..34d3158 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -672,6 +672,12 @@
     .endif
 .endm
 
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+    str \rReg, [sp, #\offset]       @ save rReg
+    .cfi_rel_offset \rReg, \offset
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -1042,6 +1048,18 @@
 #endif
     POISON_HEAP_REF r2
     str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that it also ensures ordering of
+                                                              // the class status load with respect
+                                                              // to later accesses to the class
+                                                              // object. Alternatively we could use
+                                                              // "ishst" if we use load-acquire for
+                                                              // the class status load.)
+                                                              // Needs to be done before pushing on
+                                                              // allocation since Heap::VisitObjects
+                                                              // relies on seeing the class pointer.
+                                                              // b/28790624
+    dmb    ish
                                                               // Push the new object onto the thread
                                                               // local allocation stack and
                                                               // increment the thread local
@@ -1056,14 +1074,7 @@
                                                               // and the list head store above using
                                                               // strd.
     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
+
     mov    r0, r3                                             // Set the return value and return.
     bx     lr
 
@@ -1743,3 +1754,96 @@
     .cfi_adjust_cfa_offset -4
     pop   {pc}
 END art_quick_l2f
+
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     *
+     * If `reg` is different from `r0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `reg` is used to pass the (sole) argument of this
+     *   function (instead of R0);
+     * - register `reg` is used to return the result of this function
+     *   (instead of R0);
+     * - R0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    push  {r0-r4, r9, r12, lr}          @ save return address and core caller-save registers
+    .cfi_adjust_cfa_offset 32
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r9, 20
+    .cfi_rel_offset r12, 24
+    .cfi_rel_offset lr, 28
+    vpush {s0-s15}                      @ save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
+
+    .ifnc \reg, r0
+      mov   r0, \reg                    @ pass arg1 - obj from `reg`
+    .endif
+    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
+
+    vpop {s0-s15}                       @ restore floating-point registers
+    .cfi_adjust_cfa_offset -64
+    @ If `reg` is a caller-save register, save the result to its
+    @ corresponding stack slot; it will be restored by the "pop"
+    @ instruction below. Otherwise, move result into `reg`.
+    @
+    @ (Note that saving `reg` to its stack slot will overwrite the value
+    @ previously stored by the "push" instruction above. That is
+    @ alright, as in that case we know that `reg` is not a live
+    @ register, as it is used to pass the argument and return the result
+    @ of this function.)
+    .ifc \reg, r0
+      PUSH_REG r0, 0                    @ copy result to r0's stack location
+    .else
+      .ifc \reg, r1
+        PUSH_REG r0, 4                  @ copy result to r1's stack location
+      .else
+        .ifc \reg, r2
+          PUSH_REG r0, 8                @ copy result to r2's stack location
+        .else
+          .ifc \reg, r3
+            PUSH_REG r0, 12             @ copy result to r3's stack location
+          .else
+            .ifc \reg, r4
+              PUSH_REG r0, 16           @ copy result to r4's stack location
+            .else
+              .ifc \reg, r9
+                PUSH_REG r0, 20         @ copy result to r9's stack location
+              .else
+                .ifc \reg, r12
+                  PUSH_REG r0, 24       @ copy result to r12's stack location
+                .else
+                  mov   \reg, r0        @ return result into `reg`
+                .endif
+              .endif
+            .endif
+          .endif
+        .endif
+      .endif
+    .endif
+    pop   {r0-r4, r9, r12, pc}          @ restore caller-save registers and return
+END \name
+.endm
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index bf0f647..2e5f5ad 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -27,8 +27,46 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
+
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg16(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg23(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg24(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg25(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg26(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg27(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
@@ -86,7 +124,38 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  qpoints->pReadBarrierMarkReg16 = art_quick_read_barrier_mark_reg16;
+  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
+  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
+  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
+  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
+  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
+  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
+  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
+  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
+  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
+  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
+  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
+  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
+  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
+  qpoints->pReadBarrierMarkReg30 = nullptr;  // Cannot use register 30 (LR) to pass arguments.
+  qpoints->pReadBarrierMarkReg31 = nullptr;  // Cannot use register 31 (SP/XZR) to pass arguments.
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 10ee63f..a5be52d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1253,6 +1253,22 @@
     .endif
 .endm
 
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+    .ifc \xReg1, \xExclude
+        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
+    .else
+        .ifc \xReg2, \xExclude
+            ldr \xReg1, [sp, #\offset]          // restore xReg1
+        .else
+            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
+        .endif
+    .endif
+    .cfi_restore \xReg1
+    .cfi_restore \xReg2
+.endm
+
     /*
      * Macro to insert read barrier, only used in art_quick_aput_obj.
      * xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -1260,8 +1276,18 @@
      * name mismatch between instructions. This macro uses the lower 32b of register when possible.
      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
      */
-.macro READ_BARRIER xDest, wDest, xObj, offset
+.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
 #ifdef USE_READ_BARRIER
+#ifdef USE_BAKER_READ_BARRIER
+    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
+    // False dependency to avoid needing load/load fence.
+    add \xObj, \xObj, \xTemp, lsr #32
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+    b .Lrb_exit\number
+#endif
+.Lrb_slowpath\number:
     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
     stp x0, x1, [sp, #-48]!
     .cfi_adjust_cfa_offset 48
@@ -1295,6 +1321,7 @@
     .cfi_restore x30
     add sp, sp, #48
     .cfi_adjust_cfa_offset -48
+.Lrb_exit\number:
 #else
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
     UNPOISON_HEAP_REF \wDest
@@ -1333,12 +1360,12 @@
 #endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x3
-    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
-                                                         // This also zero-extends to x4
-    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
-                                                         // This also zero-extends to x3
+    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
+                                                                    // This also zero-extends to x3
+    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
+    // This also zero-extends to x3
+    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
+                                                                    // This also zero-extends to x4
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
@@ -1633,6 +1660,18 @@
 #endif
     POISON_HEAP_REF w2
     str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that it also ensures ordering of
+                                                              // the class status load with respect
+                                                              // to later accesses to the class
+                                                              // object. Alternatively we could use
+                                                              // "ishst" if we use load-acquire for
+                                                              // the class status load.)
+                                                              // Needs to be done before pushing on
+                                                              // allocation since Heap::VisitObjects
+                                                              // relies on seeing the class pointer.
+                                                              // b/28790624
+    dmb    ish
                                                               // Push the new object onto the thread
                                                               // local allocation stack and
                                                               // increment the thread local
@@ -1647,14 +1686,7 @@
                                                               // and the list head store above using
                                                               // strd.
     str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
+
     mov    x0, x3                                             // Set the return value and return.
     ret
 .Lart_quick_alloc_object_rosalloc_slow_path:
@@ -2214,3 +2246,151 @@
     asr   x0, x0, #1
     ret
 END art_quick_indexof
+
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through W register
+     * `wreg` (corresponding to X register `xreg`), saving and restoring
+     * all caller-save registers.
+     *
+     * If `wreg` is different from `w0`, the generated function follows a
+     * non-standard runtime calling convention:
+     * - register `wreg` is used to pass the (sole) argument of this
+     *   function (instead of W0);
+     * - register `wreg` is used to return the result of this function
+     *   (instead of W0);
+     * - W0 is treated like a normal (non-argument) caller-save register;
+     * - everything else is the same as in the standard runtime calling
+     *   convention (e.g. standard callee-save registers are preserved).
+     */
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
+ENTRY \name
+    /*
+     * Allocate 46 stack slots * 8 = 368 bytes:
+     * - 20 slots for core registers X0-X19
+     * - 24 slots for floating-point registers D0-D7 and D16-D31
+     * -  1 slot for return address register XLR
+     * -  1 padding slot for 16-byte stack alignment
+     */
+    // Save all potentially live caller-save core registers.
+    stp   x0, x1,   [sp, #-368]!
+    .cfi_adjust_cfa_offset 368
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp   x2, x3,   [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x3, 24
+    stp   x4, x5,   [sp, #32]
+    .cfi_rel_offset x4, 32
+    .cfi_rel_offset x5, 40
+    stp   x6, x7,   [sp, #48]
+    .cfi_rel_offset x6, 48
+    .cfi_rel_offset x7, 56
+    stp   x8, x9,   [sp, #64]
+    .cfi_rel_offset x8, 64
+    .cfi_rel_offset x9, 72
+    stp   x10, x11, [sp, #80]
+    .cfi_rel_offset x10, 80
+    .cfi_rel_offset x11, 88
+    stp   x12, x13, [sp, #96]
+    .cfi_rel_offset x12, 96
+    .cfi_rel_offset x13, 104
+    stp   x14, x15, [sp, #112]
+    .cfi_rel_offset x14, 112
+    .cfi_rel_offset x15, 120
+    stp   x16, x17, [sp, #128]
+    .cfi_rel_offset x16, 128
+    .cfi_rel_offset x17, 136
+    stp   x18, x19, [sp, #144]
+    .cfi_rel_offset x18, 144
+    .cfi_rel_offset x19, 152
+    // Save all potentially live caller-save floating-point registers.
+    stp   d0, d1,   [sp, #160]
+    stp   d2, d3,   [sp, #176]
+    stp   d4, d5,   [sp, #192]
+    stp   d6, d7,   [sp, #208]
+    stp   d16, d17, [sp, #224]
+    stp   d18, d19, [sp, #240]
+    stp   d20, d21, [sp, #256]
+    stp   d22, d23, [sp, #272]
+    stp   d24, d25, [sp, #288]
+    stp   d26, d27, [sp, #304]
+    stp   d28, d29, [sp, #320]
+    stp   d30, d31, [sp, #336]
+    // Save return address.
+    str   xLR,      [sp, #352]
+    .cfi_rel_offset x30, 352
+    // (sp + #360 is a padding slot)
+
+    .ifnc \wreg, w0
+      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
+    .endif
+    bl    artReadBarrierMark            // artReadBarrierMark(obj)
+    .ifnc \wreg, w0
+      mov   \wreg, w0                   // Return result into `wreg`
+    .endif
+
+    // Restore core regs, except `xreg`, as `wreg` is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REGS_NE x0, x1,   0,   \xreg
+    POP_REGS_NE x2, x3,   16,  \xreg
+    POP_REGS_NE x4, x5,   32,  \xreg
+    POP_REGS_NE x6, x7,   48,  \xreg
+    POP_REGS_NE x8, x9,   64,  \xreg
+    POP_REGS_NE x10, x11, 80,  \xreg
+    POP_REGS_NE x12, x13, 96,  \xreg
+    POP_REGS_NE x14, x15, 112, \xreg
+    POP_REGS_NE x16, x17, 128, \xreg
+    POP_REGS_NE x18, x19, 144, \xreg
+    // Restore floating-point registers.
+    ldp   d0, d1,   [sp, #160]
+    ldp   d2, d3,   [sp, #176]
+    ldp   d4, d5,   [sp, #192]
+    ldp   d6, d7,   [sp, #208]
+    ldp   d16, d17, [sp, #224]
+    ldp   d18, d19, [sp, #240]
+    ldp   d20, d21, [sp, #256]
+    ldp   d22, d23, [sp, #272]
+    ldp   d24, d25, [sp, #288]
+    ldp   d26, d27, [sp, #304]
+    ldp   d28, d29, [sp, #320]
+    ldp   d30, d31, [sp, #336]
+    // Restore return address and remove padding.
+    ldr   xLR,      [sp, #352]
+    .cfi_restore x30
+    add sp, sp, #368
+    .cfi_adjust_cfa_offset -368
+    ret
+END \name
+.endm
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 5b74d62..937cd1e 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -46,7 +46,6 @@
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
       entrypoint == kQuickReadBarrierJni ||
-      entrypoint == kQuickReadBarrierMark ||
       entrypoint == kQuickReadBarrierSlow ||
       entrypoint == kQuickReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 6697a8d..22efd19 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -28,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -284,8 +284,104 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
-  qpoints->pReadBarrierMark = artReadBarrierMark;
-  static_assert(IsDirectEntrypoint(kQuickReadBarrierMark), "Direct C stub not marked direct.");
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS32.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg16),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg23),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg24),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg25),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg26),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg27),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg28),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg30 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg30),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg31 = nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg31),
+                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 030c127..b02edb6 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -28,8 +28,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -97,7 +97,40 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = artReadBarrierMark;
+  // Read barriers (and these entry points in particular) are not
+  // supported in the compiler on MIPS64.
+  qpoints->pReadBarrierMarkReg00 = nullptr;
+  qpoints->pReadBarrierMarkReg01 = nullptr;
+  qpoints->pReadBarrierMarkReg02 = nullptr;
+  qpoints->pReadBarrierMarkReg03 = nullptr;
+  qpoints->pReadBarrierMarkReg04 = nullptr;
+  qpoints->pReadBarrierMarkReg05 = nullptr;
+  qpoints->pReadBarrierMarkReg06 = nullptr;
+  qpoints->pReadBarrierMarkReg07 = nullptr;
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  qpoints->pReadBarrierMarkReg30 = nullptr;
+  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index a7d6d6f..09af373 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -2153,6 +2153,8 @@
 #endif
 }
 
+// TODO: Exercise the ReadBarrierMarkRegX entry points.
+
 TEST_F(StubTest, ReadBarrier) {
 #if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
       defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 15a8571..4e9756c 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -25,11 +25,21 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
-                                            const mirror::Class* ref_class);
+extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
+                                          const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
-extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -76,7 +86,39 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  // x86 has only 8 core registers.
+  qpoints->pReadBarrierMarkReg08 = nullptr;
+  qpoints->pReadBarrierMarkReg09 = nullptr;
+  qpoints->pReadBarrierMarkReg10 = nullptr;
+  qpoints->pReadBarrierMarkReg11 = nullptr;
+  qpoints->pReadBarrierMarkReg12 = nullptr;
+  qpoints->pReadBarrierMarkReg13 = nullptr;
+  qpoints->pReadBarrierMarkReg14 = nullptr;
+  qpoints->pReadBarrierMarkReg15 = nullptr;
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  qpoints->pReadBarrierMarkReg30 = nullptr;
+  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6234f0f..77e04e7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1908,15 +1908,81 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
-DEFINE_FUNCTION art_quick_read_barrier_mark
-    subl LITERAL(8), %esp            // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
-    PUSH eax                         // pass arg1 - obj
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
+//   (instead of EAX);
+// - register `reg` is used to return the result of this function
+//   (instead of EAX);
+// - EAX is treated like a normal (non-argument) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Save all potentially live caller-save core registers.
+    PUSH eax
+    PUSH ecx
+    PUSH edx
+    PUSH ebx
+    // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+    // and create space for caller-save floating-point registers.
+    subl MACRO_LITERAL(4 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movsd %xmm0, 0(%esp)
+    movsd %xmm1, 8(%esp)
+    movsd %xmm2, 16(%esp)
+    movsd %xmm3, 24(%esp)
+    movsd %xmm4, 32(%esp)
+    movsd %xmm5, 40(%esp)
+    movsd %xmm6, 48(%esp)
+    movsd %xmm7, 56(%esp)
+
+    subl LITERAL(4), %esp            // alignment padding
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH RAW_VAR(reg)                // pass arg1 - obj from `reg`
     call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
-    addl LITERAL(12), %esp           // pop argument and remove padding
-    CFI_ADJUST_CFA_OFFSET(-12)
+    .ifnc RAW_VAR(reg), eax
+      movl %eax, REG_VAR(reg)        // return result into `reg`
+    .endif
+    addl LITERAL(8), %esp            // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-8)
+
+    // Restore floating-point registers.
+    movsd 0(%esp), %xmm0
+    movsd 8(%esp), %xmm1
+    movsd 16(%esp), %xmm2
+    movsd 24(%esp), %xmm3
+    movsd 32(%esp), %xmm4
+    movsd 40(%esp), %xmm5
+    movsd 48(%esp), %xmm6
+    movsd 56(%esp), %xmm7
+    // Remove floating-point registers and padding.
+    addl MACRO_LITERAL(8 * 8 + 4), %esp
+    CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE ebx, RAW_VAR(reg)
+    POP_REG_NE edx, RAW_VAR(reg)
+    POP_REG_NE ecx, RAW_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
     ret
-END_FUNCTION art_quick_read_barrier_mark
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, ebp
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (ESP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, esi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, edi
 
 DEFINE_FUNCTION art_quick_read_barrier_slow
     PUSH edx                         // pass arg3 - offset
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index cf0039c..c4e723c 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -52,7 +52,7 @@
 
 #define LITERAL(value) $value
 #if defined(__APPLE__)
-    #define MACRO_LITERAL(value) $$(value)
+    #define MACRO_LITERAL(value) $(value)
 #else
     #define MACRO_LITERAL(value) $value
 #endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index bd6df70..c2e3023 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -28,11 +28,29 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                   const mirror::Class* ref_class);
+extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
+                                                 const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
-extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
+// art_quick_read_barrier_mark_regX uses an non-standard calling
+// convention: it expects its input in register X and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -82,7 +100,39 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
+  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
+  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
+  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
+  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
+  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
+  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
+  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
+  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
+  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
+  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
+  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
+  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
+  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
+  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
+  // x86-64 has only 16 core registers.
+  qpoints->pReadBarrierMarkReg16 = nullptr;
+  qpoints->pReadBarrierMarkReg17 = nullptr;
+  qpoints->pReadBarrierMarkReg18 = nullptr;
+  qpoints->pReadBarrierMarkReg19 = nullptr;
+  qpoints->pReadBarrierMarkReg20 = nullptr;
+  qpoints->pReadBarrierMarkReg21 = nullptr;
+  qpoints->pReadBarrierMarkReg22 = nullptr;
+  qpoints->pReadBarrierMarkReg23 = nullptr;
+  qpoints->pReadBarrierMarkReg24 = nullptr;
+  qpoints->pReadBarrierMarkReg25 = nullptr;
+  qpoints->pReadBarrierMarkReg26 = nullptr;
+  qpoints->pReadBarrierMarkReg27 = nullptr;
+  qpoints->pReadBarrierMarkReg28 = nullptr;
+  qpoints->pReadBarrierMarkReg29 = nullptr;
+  qpoints->pReadBarrierMarkReg30 = nullptr;
+  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index e777e6c..784ec39 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1815,16 +1815,109 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
-DEFINE_FUNCTION art_quick_read_barrier_mark
+// Create a function `name` calling the ReadBarrier::Mark routine,
+// getting its argument and returning its result through register
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+//   the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+//   the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+//   (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+//   (non-result) caller-save register;
+// - everything else is the same as in the standard runtime calling
+//   convention (e.g. standard callee-save registers are preserved).
+MACRO2(READ_BARRIER_MARK_REG, name, reg)
+    DEFINE_FUNCTION VAR(name)
+    // Save all potentially live caller-save core registers.
+    PUSH rax
+    PUSH rcx
+    PUSH rdx
+    PUSH rsi
+    PUSH rdi
+    PUSH r8
+    PUSH r9
+    PUSH r10
+    PUSH r11
+    // Create space for caller-save floating-point registers.
+    subq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(12 * 8)
+    // Save all potentially live caller-save floating-point registers.
+    movq %xmm0, 0(%rsp)
+    movq %xmm1, 8(%rsp)
+    movq %xmm2, 16(%rsp)
+    movq %xmm3, 24(%rsp)
+    movq %xmm4, 32(%rsp)
+    movq %xmm5, 40(%rsp)
+    movq %xmm6, 48(%rsp)
+    movq %xmm7, 56(%rsp)
+    movq %xmm8, 64(%rsp)
+    movq %xmm9, 72(%rsp)
+    movq %xmm10, 80(%rsp)
+    movq %xmm11, 88(%rsp)
     SETUP_FP_CALLEE_SAVE_FRAME
-    subq LITERAL(8), %rsp           // Alignment padding.
-    CFI_ADJUST_CFA_OFFSET(8)
+
+    .ifnc RAW_VAR(reg), rdi
+      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
+    .endif
     call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
-    addq LITERAL(8), %rsp
-    CFI_ADJUST_CFA_OFFSET(-8)
+    .ifnc RAW_VAR(reg), rax
+      movq %rax, REG_VAR(reg)       // Return result into `reg`.
+    .endif
+
     RESTORE_FP_CALLEE_SAVE_FRAME
+    // Restore floating-point registers.
+    movq 0(%rsp), %xmm0
+    movq 8(%rsp), %xmm1
+    movq 16(%rsp), %xmm2
+    movq 24(%rsp), %xmm3
+    movq 32(%rsp), %xmm4
+    movq 40(%rsp), %xmm5
+    movq 48(%rsp), %xmm6
+    movq 56(%rsp), %xmm7
+    movq 64(%rsp), %xmm8
+    movq 72(%rsp), %xmm9
+    movq 80(%rsp), %xmm10
+    movq 88(%rsp), %xmm11
+    // Remove floating-point registers.
+    addq MACRO_LITERAL(12 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+    // Restore core regs, except `reg`, as it is used to return the
+    // result of this function (simply remove it from the stack instead).
+    POP_REG_NE r11, RAW_VAR(reg)
+    POP_REG_NE r10, RAW_VAR(reg)
+    POP_REG_NE r9, RAW_VAR(reg)
+    POP_REG_NE r8, RAW_VAR(reg)
+    POP_REG_NE rdi, RAW_VAR(reg)
+    POP_REG_NE rsi, RAW_VAR(reg)
+    POP_REG_NE rdx, RAW_VAR(reg)
+    POP_REG_NE rcx, RAW_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
     ret
-END_FUNCTION art_quick_read_barrier_slow
+    END_FUNCTION VAR(name)
+END_MACRO
+
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
+// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
+// cannot be used to pass arguments.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
 
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 26450c4..32425d8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -120,6 +120,10 @@
   return dex_method_index_;
 }
 
+inline uint32_t ArtMethod::GetImtIndex() {
+  return GetDexMethodIndex() % ImTable::kSize;
+}
+
 inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
   return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
                                        pointer_size);
@@ -503,7 +507,7 @@
       SetEntryPointFromJniPtrSize(new_native_code, pointer_size);
     }
   } else {
-    DCHECK(GetEntryPointFromJniPtrSize(pointer_size) == nullptr);
+    DCHECK(GetDataPtrSize(pointer_size) == nullptr);
   }
   const void* old_code = GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
   const void* new_code = visitor(old_code);
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f86cb13..113827a 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -16,6 +16,8 @@
 
 #include "art_method.h"
 
+#include <cstddef>
+
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
@@ -497,4 +499,24 @@
   hotness_count_ = 0;
 }
 
+bool ArtMethod::IsImagePointerSize(size_t pointer_size) {
+  // Hijack this function to get access to PtrSizedFieldsOffset.
+  //
+  // Ensure that PrtSizedFieldsOffset is correct. We rely here on usually having both 32-bit and
+  // 64-bit builds.
+  static_assert(std::is_standard_layout<ArtMethod>::value, "ArtMethod is not standard layout.");
+  static_assert((sizeof(void*) != 4) ||
+                    (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(4)),
+                "Unexpected 32-bit class layout.");
+  static_assert((sizeof(void*) != 8) ||
+                    (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(8)),
+                "Unexpected 64-bit class layout.");
+
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr) {
+    return true;
+  }
+  return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size;
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 90b2406..1d14203 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_ART_METHOD_H_
 #define ART_RUNTIME_ART_METHOD_H_
 
+#include <cstddef>
+
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "dex_file.h"
@@ -219,7 +221,7 @@
 class ArtMethod FINAL {
  public:
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
-      method_index_(0) { }
+      method_index_(0), hotness_count_(0) { }
 
   ArtMethod(ArtMethod* src, size_t image_pointer_size) {
     CopyFrom(src, image_pointer_size);
@@ -419,6 +421,8 @@
 
   ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetDexMethodIndex(uint32_t new_idx) {
     // Not called within a transaction.
     dex_method_index_ = new_idx;
@@ -506,9 +510,13 @@
         PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*) * pointer_size);
   }
 
-  static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+  static MemberOffset DataOffset(size_t pointer_size) {
     return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
-        PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
+        PtrSizedFields, data_) / sizeof(void*) * pointer_size);
+  }
+
+  static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+    return DataOffset(pointer_size);
   }
 
   static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) {
@@ -516,37 +524,40 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
-  ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
-    return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
-  }
-
   ImtConflictTable* GetImtConflictTable(size_t pointer_size) {
     DCHECK(IsRuntimeMethod());
-    return reinterpret_cast<ImtConflictTable*>(GetEntryPointFromJniPtrSize(pointer_size));
+    return reinterpret_cast<ImtConflictTable*>(GetDataPtrSize(pointer_size));
   }
 
   ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, size_t pointer_size) {
-    SetEntryPointFromJniPtrSize(table, pointer_size);
+    DCHECK(IsRuntimeMethod());
+    SetDataPtrSize(table, pointer_size);
+  }
+
+  ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
+    return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size));
   }
 
   ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
-    SetEntryPointFromJniPtrSize(info, sizeof(void*));
+    SetDataPtrSize(info, sizeof(void*));
   }
 
   ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, size_t pointer_size) {
-    SetEntryPointFromJniPtrSize(info, pointer_size);
+    SetDataPtrSize(info, pointer_size);
   }
 
   static MemberOffset ProfilingInfoOffset() {
-    return EntryPointFromJniOffset(sizeof(void*));
+    DCHECK(IsImagePointerSize(sizeof(void*)));
+    return DataOffset(sizeof(void*));
   }
 
   void* GetEntryPointFromJni() {
+    DCHECK(IsNative());
     return GetEntryPointFromJniPtrSize(sizeof(void*));
   }
 
   ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
-    return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
+    return GetDataPtrSize(pointer_size);
   }
 
   void SetEntryPointFromJni(const void* entrypoint) {
@@ -555,7 +566,17 @@
   }
 
   ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
-    SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
+    SetDataPtrSize(entrypoint, pointer_size);
+  }
+
+  ALWAYS_INLINE void* GetDataPtrSize(size_t pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    return GetNativePointer<void*>(DataOffset(pointer_size), pointer_size);
+  }
+
+  ALWAYS_INLINE void SetDataPtrSize(const void* data, size_t pointer_size) {
+    DCHECK(IsImagePointerSize(pointer_size));
+    SetNativePointer(DataOffset(pointer_size), data, pointer_size);
   }
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
@@ -640,7 +661,7 @@
 
   // Size of an instance of this native class.
   static size_t Size(size_t pointer_size) {
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size) +
+    return PtrSizedFieldsOffset(pointer_size) +
         (sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size;
   }
 
@@ -727,9 +748,7 @@
   // Fake padding field gets inserted here.
 
   // Must be the last fields in the method.
-  // PACKED(4) is necessary for the correctness of
-  // RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size).
-  struct PACKED(4) PtrSizedFields {
+  struct PtrSizedFields {
     // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
     ArtMethod** dex_cache_resolved_methods_;
 
@@ -738,7 +757,7 @@
 
     // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
     // or the profiling data for non-native methods, or an ImtConflictTable.
-    void* entry_point_from_jni_;
+    void* data_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
     // the interpreter.
@@ -746,11 +765,14 @@
   } ptr_sized_fields_;
 
  private:
-  static size_t PtrSizedFieldsOffset(size_t pointer_size) {
-    // Round up to pointer size for padding field.
-    return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size);
+  static constexpr size_t PtrSizedFieldsOffset(size_t pointer_size) {
+    // Round up to pointer size for padding field. Tested in art_method.cc.
+    return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_), pointer_size);
   }
 
+  // Compare given pointer size to the image pointer size.
+  static bool IsImagePointerSize(size_t pointer_size);
+
   template<typename T>
   ALWAYS_INLINE T GetNativePointer(MemberOffset offset, size_t pointer_size) const {
     static_assert(std::is_pointer<T>::value, "T must be a pointer type");
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index da68923..50a786f 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -127,20 +127,20 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
+// Offset of field Thread::tlsPtr_.thread_local_objects.
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
-            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
             art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
index 9756338..de6f423 100644
--- a/runtime/base/file_magic.cc
+++ b/runtime/base/file_magic.cc
@@ -21,27 +21,28 @@
 #include <sys/types.h>
 
 #include "base/logging.h"
+#include "base/unix_file/fd_file.h"
 #include "dex_file.h"
 #include "stringprintf.h"
 
 namespace art {
 
-ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
   CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
+  File fd(filename, O_RDONLY, /* check_usage */ false);
+  if (fd.Fd() == -1) {
     *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return ScopedFd();
+    return File();
   }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
+  int n = TEMP_FAILURE_RETRY(read(fd.Fd(), magic, sizeof(*magic)));
   if (n != sizeof(*magic)) {
     *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return ScopedFd();
+    return File();
   }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
+  if (lseek(fd.Fd(), 0, SEEK_SET) != 0) {
     *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
                               strerror(errno));
-    return ScopedFd();
+    return File();
   }
   return fd;
 }
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
index f7e4bad..4b5d2f5 100644
--- a/runtime/base/file_magic.h
+++ b/runtime/base/file_magic.h
@@ -20,12 +20,12 @@
 #include <stdint.h>
 #include <string>
 
-#include "ScopedFd.h"
+#include "os.h"
 
 namespace art {
 
 // Open file and read magic number
-ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+File OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
 
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index e4097dd..6f0e125 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -53,7 +53,15 @@
       fd_(fd), file_path_(path), auto_close_(true), read_only_mode_(read_only_mode) {
 }
 
-FdFile::~FdFile() {
+FdFile::FdFile(const std::string& path, int flags, mode_t mode, bool check_usage)
+    : fd_(-1), auto_close_(true) {
+  Open(path, flags, mode);
+  if (!check_usage || !IsOpened()) {
+    guard_state_ = GuardState::kNoCheck;
+  }
+}
+
+void FdFile::Destroy() {
   if (kCheckSafeUsage && (guard_state_ < GuardState::kNoCheck)) {
     if (guard_state_ < GuardState::kFlushed) {
       LOG(::art::ERROR) << "File " << file_path_ << " wasn't explicitly flushed before destruction.";
@@ -70,6 +78,28 @@
   }
 }
 
+FdFile& FdFile::operator=(FdFile&& other) {
+  if (this == &other) {
+    return *this;
+  }
+
+  if (this->fd_ != other.fd_) {
+    Destroy();  // Free old state.
+  }
+
+  guard_state_ = other.guard_state_;
+  fd_ = other.fd_;
+  file_path_ = std::move(other.file_path_);
+  auto_close_ = other.auto_close_;
+  other.Release();  // Release other.
+
+  return *this;
+}
+
+FdFile::~FdFile() {
+  Destroy();
+}
+
 void FdFile::moveTo(GuardState target, GuardState warn_threshold, const char* warning) {
   if (kCheckSafeUsage) {
     if (guard_state_ < GuardState::kNoCheck) {
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 16cd44f..d896ee9 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_BASE_UNIX_FILE_FD_FILE_H_
 
 #include <fcntl.h>
+
 #include <string>
+
 #include "base/unix_file/random_access_file.h"
 #include "base/macros.h"
 
@@ -39,6 +41,46 @@
   FdFile(int fd, const std::string& path, bool checkUsage);
   FdFile(int fd, const std::string& path, bool checkUsage, bool read_only_mode);
 
+  FdFile(const std::string& path, int flags, bool checkUsage)
+      : FdFile(path, flags, 0640, checkUsage) {}
+  FdFile(const std::string& path, int flags, mode_t mode, bool checkUsage);
+
+  // Move constructor.
+  FdFile(FdFile&& other)
+      : guard_state_(other.guard_state_),
+        fd_(other.fd_),
+        file_path_(std::move(other.file_path_)),
+        auto_close_(other.auto_close_),
+        read_only_mode_(other.read_only_mode_) {
+    other.Release();  // Release the src.
+  }
+
+  // Move assignment operator.
+  FdFile& operator=(FdFile&& other);
+
+  // Release the file descriptor. This will make further accesses to this FdFile invalid. Disables
+  // all further state checking.
+  int Release() {
+    int tmp_fd = fd_;
+    fd_ = -1;
+    guard_state_ = GuardState::kNoCheck;
+    auto_close_ = false;
+    return tmp_fd;
+  }
+
+  void Reset(int fd, bool check_usage) {
+    if (fd_ != -1 && fd_ != fd) {
+      Destroy();
+    }
+    fd_ = fd;
+    if (check_usage) {
+      guard_state_ = fd == -1 ? GuardState::kNoCheck : GuardState::kBase;
+    } else {
+      guard_state_ = GuardState::kNoCheck;
+    }
+    // Keep the auto_close_ state.
+  }
+
   // Destroys an FdFile, closing the file descriptor if Close hasn't already
   // been called. (If you care about the return value of Close, call it
   // yourself; this is meant to handle failure cases and read-only accesses.
@@ -46,10 +88,6 @@
   // guarantee that data actually made it to stable storage.)
   virtual ~FdFile();
 
-  // Opens file 'file_path' using 'flags' and 'mode'.
-  bool Open(const std::string& file_path, int flags);
-  bool Open(const std::string& file_path, int flags, mode_t mode);
-
   // RandomAccessFile API.
   int Close() OVERRIDE WARN_UNUSED;
   int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
@@ -119,10 +157,16 @@
 
   GuardState guard_state_;
 
+  // Opens file 'file_path' using 'flags' and 'mode'.
+  bool Open(const std::string& file_path, int flags);
+  bool Open(const std::string& file_path, int flags, mode_t mode);
+
  private:
   template <bool kUseOffset>
   bool WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset);
 
+  void Destroy();  // For ~FdFile and operator=(&&).
+
   int fd_;
   std::string file_path_;
   bool auto_close_;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 9bc87e5..db3a44f 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -49,29 +49,28 @@
 
 TEST_F(FdFileTest, OpenClose) {
   std::string good_path(GetTmpPath("some-file.txt"));
-  FdFile file;
-  ASSERT_TRUE(file.Open(good_path, O_CREAT | O_WRONLY));
+  FdFile file(good_path, O_CREAT | O_WRONLY, true);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
   EXPECT_EQ(0, file.Flush());
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
-  EXPECT_TRUE(file.Open(good_path,  O_RDONLY));
-  EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
+  FdFile file2(good_path,  O_RDONLY, true);
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_GE(file2.Fd(), 0);
 
-  ASSERT_EQ(file.Close(), 0);
+  ASSERT_EQ(file2.Close(), 0);
   ASSERT_EQ(unlink(good_path.c_str()), 0);
 }
 
 TEST_F(FdFileTest, ReadFullyEmptyFile) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDONLY));
+  FdFile file(tmp.GetFilename(), O_RDONLY, false);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
   uint8_t buffer[16];
   EXPECT_FALSE(file.ReadFully(&buffer, 4));
 }
@@ -84,10 +83,9 @@
 TEST_F(FdFileTest, ReadFullyWithOffset) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
   EXPECT_GE(file.Fd(), 0);
-  EXPECT_TRUE(file.IsOpened());
 
   char ignore_prefix[20] = {'a', };
   NullTerminateCharArray(ignore_prefix);
@@ -113,9 +111,8 @@
 TEST_F(FdFileTest, ReadWriteFullyWithOffset) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
-  FdFile file;
-  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
-  EXPECT_GE(file.Fd(), 0);
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
 
   const char* test_string = "This is a test string";
@@ -140,8 +137,7 @@
 
 TEST_F(FdFileTest, Copy) {
   art::ScratchFile src_tmp;
-  FdFile src;
-  ASSERT_TRUE(src.Open(src_tmp.GetFilename(), O_RDWR));
+  FdFile src(src_tmp.GetFilename(), O_RDWR, false);
   ASSERT_GE(src.Fd(), 0);
   ASSERT_TRUE(src.IsOpened());
 
@@ -151,8 +147,7 @@
   ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
 
   art::ScratchFile dest_tmp;
-  FdFile dest;
-  ASSERT_TRUE(dest.Open(src_tmp.GetFilename(), O_RDWR));
+  FdFile dest(src_tmp.GetFilename(), O_RDWR, false);
   ASSERT_GE(dest.Fd(), 0);
   ASSERT_TRUE(dest.IsOpened());
 
@@ -168,4 +163,22 @@
   ASSERT_EQ(0, src.Close());
 }
 
+TEST_F(FdFileTest, MoveConstructor) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+
+  int old_fd = file.Fd();
+
+  FdFile file2(std::move(file));
+  EXPECT_FALSE(file.IsOpened());
+  EXPECT_TRUE(file2.IsOpened());
+  EXPECT_EQ(old_fd, file2.Fd());
+
+  ASSERT_EQ(file2.Flush(), 0);
+  ASSERT_EQ(file2.Close(), 0);
+}
+
 }  // namespace unix_file
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cb97faa..d0dad64 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -6159,11 +6159,6 @@
   }
 }
 
-static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return interface_method->GetDexMethodIndex() % ImTable::kSize;
-}
-
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
                                                       LinearAlloc* linear_alloc,
                                                       size_t image_pointer_size) {
@@ -6215,7 +6210,7 @@
       // or interface methods in the IMT here they will not create extra conflicts since we compare
       // names and signatures in SetIMTRef.
       ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-      const uint32_t imt_index = GetIMTIndex(interface_method);
+      const uint32_t imt_index = interface_method->GetImtIndex();
 
       // There is only any conflicts if all of the interface methods for an IMT slot don't have
       // the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6269,7 +6264,7 @@
         }
         DCHECK(implementation_method != nullptr);
         ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-        const uint32_t imt_index = GetIMTIndex(interface_method);
+        const uint32_t imt_index = interface_method->GetImtIndex();
         if (!imt[imt_index]->IsRuntimeMethod() ||
             imt[imt_index] == unimplemented_method ||
             imt[imt_index] == imt_conflict_method) {
@@ -6675,7 +6670,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = GetIMTIndex(interface_method);
+        uint32_t imt_index = interface_method->GetImtIndex();
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -7700,7 +7695,7 @@
   }
 
   if (is_static) {
-    resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 5a203af..5d9ae14 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -35,6 +35,7 @@
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "base/systrace.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_file_verifier.h"
@@ -54,11 +55,6 @@
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
 namespace art {
 
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
@@ -85,14 +81,14 @@
     DCHECK_EQ(zip_entry_name[-1], kMultiDexSeparator);
   }
 
-  ScopedFd fd(OpenAndReadMagic(file_part, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(file_part, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
     std::unique_ptr<ZipArchive> zip_archive(
-        ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
+        ZipArchive::OpenFromFd(fd.Release(), filename, error_msg));
     if (zip_archive.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open zip archive '%s' (error msg: %s)", file_part,
                                 error_msg->c_str());
@@ -109,7 +105,7 @@
   }
   if (IsDexMagic(magic)) {
     std::unique_ptr<const DexFile> dex_file(
-        DexFile::OpenFile(fd.release(), filename, false, false, error_msg));
+        DexFile::OpenFile(fd.Release(), filename, false, false, error_msg));
     if (dex_file.get() == nullptr) {
       return false;
     }
@@ -128,16 +124,16 @@
   ScopedTrace trace(std::string("Open dex file ") + location);
   DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is nullptr";
   uint32_t magic;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, error_msg);
+  if (fd.Fd() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd.release(), location, verify_checksum, error_msg, dex_files);
+    return DexFile::OpenZip(fd.Release(), location, verify_checksum, error_msg, dex_files);
   }
   if (IsDexMagic(magic)) {
-    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(),
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.Release(),
                                                               location,
                                                               /* verify */ true,
                                                               verify_checksum,
@@ -166,12 +162,12 @@
 bool DexFile::MaybeDex(const char* filename) {
   uint32_t magic;
   std::string error_msg;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
-  if (fd.get() == -1) {
+  File fd = OpenAndReadMagic(filename, &magic, &error_msg);
+  if (fd.Fd() == -1) {
     return false;
   }
   if (IsZipMagic(magic)) {
-    return ContainsClassesDex(fd.release(), filename);
+    return ContainsClassesDex(fd.Release(), filename);
   } else if (IsDexMagic(magic)) {
     return true;
   }
@@ -244,7 +240,7 @@
   CHECK(location != nullptr);
   std::unique_ptr<MemMap> map;
   {
-    ScopedFd delayed_close(fd);
+    File delayed_close(fd, /* check_usage */ false);
     struct stat sbuf;
     memset(&sbuf, 0, sizeof(sbuf));
     if (fstat(fd, &sbuf) == -1) {
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ab14655..7ecd595 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,7 @@
 
 #include "entrypoint_utils.h"
 
-#include "art_method.h"
+#include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -600,7 +600,7 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
+      uint32_t imt_index = resolved_method->GetImtIndex();
       size_t pointer_size = class_linker->GetImagePointerSize();
       ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
           Get(imt_index, pointer_size);
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 968ac53..8db69a3 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,7 +20,7 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
+extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(klass != nullptr);
   DCHECK(ref_class != nullptr);
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index d0dad34..86fb881 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -50,16 +50,16 @@
 extern "C" int art_quick_set64_static(uint32_t, int64_t);
 extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
 extern "C" int art_quick_set_obj_static(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_instance(uint32_t, void*);
-extern "C" uint8_t art_quick_get_boolean_instance(uint32_t, void*);
-extern "C" int8_t art_quick_get_byte_static(uint32_t);
-extern "C" uint8_t art_quick_get_boolean_static(uint32_t);
-extern "C" int16_t art_quick_get_short_instance(uint32_t, void*);
-extern "C" uint16_t art_quick_get_char_instance(uint32_t, void*);
-extern "C" int16_t art_quick_get_short_static(uint32_t);
-extern "C" uint16_t art_quick_get_char_static(uint32_t);
-extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
-extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" ssize_t art_quick_get_byte_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_boolean_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_byte_static(uint32_t);
+extern "C" size_t art_quick_get_boolean_static(uint32_t);
+extern "C" ssize_t art_quick_get_short_instance(uint32_t, void*);
+extern "C" size_t art_quick_get_char_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get_short_static(uint32_t);
+extern "C" size_t art_quick_get_char_static(uint32_t);
+extern "C" ssize_t art_quick_get32_instance(uint32_t, void*);
+extern "C" ssize_t art_quick_get32_static(uint32_t);
 extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
 extern "C" int64_t art_quick_get64_static(uint32_t);
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 30b639e..e0ec68e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,7 +33,7 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
@@ -51,16 +51,16 @@
   V(Set64Static, int, uint32_t, int64_t) \
   V(SetObjInstance, int, uint32_t, void*, void*) \
   V(SetObjStatic, int, uint32_t, void*) \
-  V(GetByteInstance, int8_t, uint32_t, void*) \
-  V(GetBooleanInstance, uint8_t, uint32_t, void*) \
-  V(GetByteStatic, int8_t, uint32_t) \
-  V(GetBooleanStatic, uint8_t, uint32_t) \
-  V(GetShortInstance, int16_t, uint32_t, void*) \
-  V(GetCharInstance, uint16_t, uint32_t, void*) \
-  V(GetShortStatic, int16_t, uint32_t) \
-  V(GetCharStatic, uint16_t, uint32_t) \
-  V(Get32Instance, int32_t, uint32_t, void*) \
-  V(Get32Static, int32_t, uint32_t) \
+  V(GetByteInstance, ssize_t, uint32_t, void*) \
+  V(GetBooleanInstance, size_t, uint32_t, void*) \
+  V(GetByteStatic, ssize_t, uint32_t) \
+  V(GetBooleanStatic, size_t, uint32_t) \
+  V(GetShortInstance, ssize_t, uint32_t, void*) \
+  V(GetCharInstance, size_t, uint32_t, void*) \
+  V(GetShortStatic, ssize_t, uint32_t) \
+  V(GetCharStatic, size_t, uint32_t) \
+  V(Get32Instance, ssize_t, uint32_t, void*) \
+  V(Get32Static, ssize_t, uint32_t) \
   V(Get64Instance, int64_t, uint32_t, void*) \
   V(Get64Static, int64_t, uint32_t) \
   V(GetObjInstance, void*, uint32_t, void*) \
@@ -164,7 +164,38 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
-  V(ReadBarrierMark, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg01, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg02, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg03, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg04, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg05, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg06, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg07, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg08, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg09, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg10, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg11, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg12, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg13, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg14, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg15, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg16, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg17, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg18, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg19, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg20, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg21, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg22, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg23, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg24, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg25, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg26, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg27, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg30, mirror::Object*, mirror::Object*) \
+  V(ReadBarrierMarkReg31, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index a245f18..1a12bd4 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -55,9 +55,7 @@
   return field;
 }
 
-extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx,
-                                           ArtMethod* referrer,
-                                           Thread* self)
+extern "C" ssize_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -71,9 +69,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx,
-                                               ArtMethod* referrer,
-                                               Thread* self)
+extern "C" size_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
@@ -87,9 +83,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" ssize_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -103,9 +97,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" size_t artGetCharStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
@@ -119,9 +111,7 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
-                                           ArtMethod* referrer,
-                                           Thread* self)
+extern "C" size_t artGet32StaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t));
@@ -173,10 +163,10 @@
   return nullptr;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx,
-                                             mirror::Object* obj,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" ssize_t artGetByteInstanceFromCode(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              ArtMethod* referrer,
+                                              Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -194,10 +184,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx,
-                                                 mirror::Object* obj,
-                                                 ArtMethod* referrer,
-                                                 Thread* self)
+extern "C" size_t artGetBooleanInstanceFromCode(uint32_t field_idx,
+                                                mirror::Object* obj,
+                                                ArtMethod* referrer,
+                                                Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
@@ -214,7 +204,7 @@
   }
   return 0;  // Will throw exception by checking with Thread::Current.
 }
-extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx,
+extern "C" ssize_t artGetShortInstanceFromCode(uint32_t field_idx,
                                                mirror::Object* obj,
                                                ArtMethod* referrer,
                                                Thread* self)
@@ -235,10 +225,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx,
-                                               mirror::Object* obj,
-                                               ArtMethod* referrer,
-                                               Thread* self)
+extern "C" size_t artGetCharInstanceFromCode(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             ArtMethod* referrer,
+                                             Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
@@ -256,10 +246,10 @@
   return 0;  // Will throw exception by checking with Thread::Current.
 }
 
-extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx,
-                                             mirror::Object* obj,
-                                             ArtMethod* referrer,
-                                             Thread* self)
+extern "C" size_t artGet32InstanceFromCode(uint32_t field_idx,
+                                           mirror::Object* obj,
+                                           ArtMethod* referrer,
+                                           Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1152b94..49043f6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2170,8 +2170,7 @@
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
-    uint32_t imt_index = interface_method->GetDexMethodIndex();
-    ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+    ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*));
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
       ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
       DCHECK(current_table != nullptr);
@@ -2222,8 +2221,8 @@
 
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
-  uint32_t imt_index = interface_method->GetDexMethodIndex();
-  ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+  uint32_t imt_index = interface_method->GetImtIndex();
+  ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*));
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2234,7 +2233,7 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      imt->Set(imt_index % ImTable::kSize,
+      imt->Set(imt_index,
                new_conflict_method,
                sizeof(void*));
     }
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 7a624b2..ffe4109 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -121,10 +121,10 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, thread_local_pos, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, mterp_current_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
@@ -324,8 +324,70 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMark, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMark, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg00, pReadBarrierMarkReg01,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg01, pReadBarrierMarkReg02,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg02, pReadBarrierMarkReg03,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg03, pReadBarrierMarkReg04,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg04, pReadBarrierMarkReg05,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg05, pReadBarrierMarkReg06,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg06, pReadBarrierMarkReg07,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg07, pReadBarrierMarkReg08,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg08, pReadBarrierMarkReg09,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg09, pReadBarrierMarkReg10,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg10, pReadBarrierMarkReg11,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg11, pReadBarrierMarkReg12,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg12, pReadBarrierMarkReg13,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg13, pReadBarrierMarkReg14,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg14, pReadBarrierMarkReg15,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg15, pReadBarrierMarkReg16,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg16, pReadBarrierMarkReg17,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg17, pReadBarrierMarkReg18,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg18, pReadBarrierMarkReg19,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg19, pReadBarrierMarkReg20,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg20, pReadBarrierMarkReg21,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg21, pReadBarrierMarkReg22,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg22, pReadBarrierMarkReg23,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg23, pReadBarrierMarkReg24,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg24, pReadBarrierMarkReg25,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg25, pReadBarrierMarkReg26,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg26, pReadBarrierMarkReg27,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg27, pReadBarrierMarkReg28,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg28, pReadBarrierMarkReg29,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierMarkReg30,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg30, pReadBarrierMarkReg31,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg31, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 3011112..4019a5b 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -153,6 +153,14 @@
   }
 }
 
+inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+  // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
+  if (UNLIKELY(mark_from_read_barrier_measurements_)) {
+    return MarkFromReadBarrierWithMeasurements(from_ref);
+  }
+  return Mark(from_ref);
+}
+
 inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
   DCHECK(region_space_->IsInFromSpace(from_ref));
   LockWord lw = from_ref->GetLockWord(false);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b7b5aa0..d2d2f23 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,7 +17,9 @@
 #include "concurrent_copying.h"
 
 #include "art_field-inl.h"
+#include "base/histogram-inl.h"
 #include "base/stl_util.h"
+#include "base/systrace.h"
 #include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
@@ -39,7 +41,9 @@
 
 static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
 
-ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+ConcurrentCopying::ConcurrentCopying(Heap* heap,
+                                     const std::string& name_prefix,
+                                     bool measure_read_barrier_slow_path)
     : GarbageCollector(heap,
                        name_prefix + (name_prefix.empty() ? "" : " ") +
                        "concurrent copying + mark sweep"),
@@ -54,6 +58,14 @@
       heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+      measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+      rb_slow_path_ns_(0),
+      rb_slow_path_count_(0),
+      rb_slow_path_count_gc_(0),
+      rb_slow_path_histogram_lock_("Read barrier histogram lock"),
+      rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32),
+      rb_slow_path_count_total_(0),
+      rb_slow_path_count_gc_total_(0),
       rb_table_(heap_->GetReadBarrierTable()),
       force_evacuate_all_(false),
       immune_gray_stack_lock_("concurrent copying immune gray stack lock",
@@ -162,6 +174,14 @@
     MutexLock mu(Thread::Current(), mark_stack_lock_);
     CHECK(false_gray_stack_.empty());
   }
+
+  mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.StoreRelaxed(0);
+    rb_slow_path_count_.StoreRelaxed(0);
+    rb_slow_path_count_gc_.StoreRelaxed(0);
+  }
+
   immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
@@ -194,7 +214,7 @@
 }
 
 // Used to switch the thread roots of a thread from from-space refs to to-space refs.
-class ConcurrentCopying::ThreadFlipVisitor : public Closure {
+class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor {
  public:
   ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
       : concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
@@ -221,10 +241,44 @@
       thread->RevokeThreadLocalAllocationStack();
     }
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(concurrent_copying_);
+    // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
+    // only.
+    thread->VisitRoots(this);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
+  void VisitRoots(mirror::Object*** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::Object** root = roots[i];
+      mirror::Object* ref = *root;
+      if (ref != nullptr) {
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          *root = to_ref;
+        }
+      }
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                  size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      mirror::CompressedReference<mirror::Object>* const root = roots[i];
+      if (!root->IsNull()) {
+        mirror::Object* ref = root->AsMirrorPtr();
+        mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+        if (to_ref != ref) {
+          root->Assign(to_ref);
+        }
+      }
+    }
+  }
+
  private:
   ConcurrentCopying* const concurrent_copying_;
   const bool use_tlab_;
@@ -1996,9 +2050,17 @@
     MutexLock mu(Thread::Current(), skipped_blocks_lock_);
     skipped_blocks_map_.clear();
   }
-  ReaderMutexLock mu(self, *Locks::mutator_lock_);
-  WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
-  heap_->ClearMarkedObjects();
+  {
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+    heap_->ClearMarkedObjects();
+  }
+  if (measure_read_barrier_slow_path_) {
+    MutexLock mu(self, rb_slow_path_histogram_lock_);
+    rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
+    rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
+    rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+  }
 }
 
 bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
@@ -2036,6 +2098,37 @@
   region_space_->RevokeAllThreadLocalBuffers();
 }
 
+mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
+  if (Thread::Current() != thread_running_gc_) {
+    rb_slow_path_count_.FetchAndAddRelaxed(1u);
+  } else {
+    rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+  }
+  ScopedTrace tr(__FUNCTION__);
+  const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
+  mirror::Object* ret = Mark(from_ref);
+  if (measure_read_barrier_slow_path_) {
+    rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+  }
+  return ret;
+}
+
+void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
+  GarbageCollector::DumpPerformanceInfo(os);
+  MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
+  if (rb_slow_path_time_histogram_.SampleSize() > 0) {
+    Histogram<uint64_t>::CumulativeData cumulative_data;
+    rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data);
+    rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+  }
+  if (rb_slow_path_count_total_ > 0) {
+    os << "Slow path count " << rb_slow_path_count_total_ << "\n";
+  }
+  if (rb_slow_path_count_gc_total_ > 0) {
+    os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
+  }
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 166a1f0..6a8d052 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -58,17 +58,24 @@
   // Enable verbose mode.
   static constexpr bool kVerboseMode = false;
 
-  ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+  ConcurrentCopying(Heap* heap,
+                    const std::string& name_prefix = "",
+                    bool measure_read_barrier_slow_path = false);
   ~ConcurrentCopying();
 
   virtual void RunPhases() OVERRIDE
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+      REQUIRES(!immune_gray_stack_lock_,
+               !mark_stack_lock_,
+               !rb_slow_path_histogram_lock_,
+               !skipped_blocks_lock_);
   void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
   void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
-  void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  void FinishPhase() REQUIRES(!mark_stack_lock_,
+                              !rb_slow_path_histogram_lock_,
+                              !skipped_blocks_lock_);
 
   void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::heap_bitmap_lock_);
@@ -95,7 +102,11 @@
     return IsMarked(ref) == ref;
   }
   template<bool kGrayImmuneObject = true>
-  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   bool IsMarking() const {
     return is_marking_;
@@ -203,6 +214,10 @@
       REQUIRES(!mark_stack_lock_);
   void ScanImmuneObject(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
@@ -251,6 +266,20 @@
   Atomic<size_t> to_space_bytes_skipped_;
   Atomic<size_t> to_space_objects_skipped_;
 
+  // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier
+  // and also log.
+  bool measure_read_barrier_slow_path_;
+  // mark_from_read_barrier_measurements_ is true if systrace is enabled or
+  // measure_read_barrier_time_ is true.
+  bool mark_from_read_barrier_measurements_;
+  Atomic<uint64_t> rb_slow_path_ns_;
+  Atomic<uint64_t> rb_slow_path_count_;
+  Atomic<uint64_t> rb_slow_path_count_gc_;
+  mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+  uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+
   accounting::ReadBarrierTable* rb_table_;
   bool force_evacuate_all_;  // True if all regions are evacuated.
   Atomic<bool> updated_all_immune_objects_;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 580486a..e0b71a7 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -181,7 +181,7 @@
   void RecordFree(const ObjectBytePair& freed);
   // Record a free of large objects.
   void RecordFreeLOS(const ObjectBytePair& freed);
-  void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
+  virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
 
   // Helper functions for querying if objects are marked. These are used for processing references,
   // and will be used for reading system weaks while the GC is running.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8da1493..6f4767e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -121,6 +121,10 @@
 
 static constexpr size_t kNativeAllocationHistogramBuckets = 16;
 
+// Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier
+// config.
+static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -153,6 +157,7 @@
            bool verify_pre_sweeping_rosalloc,
            bool verify_post_gc_rosalloc,
            bool gc_stress_mode,
+           bool measure_gc_performance,
            bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
@@ -220,7 +225,8 @@
       min_free_(min_free),
       max_free_(max_free),
       target_utilization_(target_utilization),
-      foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier),
+      foreground_heap_growth_multiplier_(
+          foreground_heap_growth_multiplier + kExtraHeapGrowthMultiplier),
       total_wait_time_(0),
       verify_object_mode_(kVerifyObjectModeDisabled),
       disable_moving_gc_count_(0),
@@ -594,7 +600,9 @@
       garbage_collectors_.push_back(semi_space_collector_);
     }
     if (MayUseCollector(kCollectorTypeCC)) {
-      concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+      concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
+                                                                       "",
+                                                                       measure_gc_performance);
       garbage_collectors_.push_back(concurrent_copying_collector_);
     }
     if (MayUseCollector(kCollectorTypeMC)) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6fb048a..bb0d11a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -182,6 +182,7 @@
        bool verify_pre_sweeping_rosalloc,
        bool verify_post_gc_rosalloc,
        bool gc_stress_mode,
+       bool measure_gc_performance,
        bool use_homogeneous_space_compaction,
        uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 8c42b3a..f1f7f42 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -324,7 +324,7 @@
       } else {
         while (true) {
           // Mterp does not support all instrumentation/debugging.
-          if (MterpShouldSwitchInterpreters()) {
+          if (MterpShouldSwitchInterpreters() != 0) {
             return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,
                                                    false);
           }
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
index 0d57cbf..04236ad 100644
--- a/runtime/interpreter/mterp/arm64/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -7,8 +7,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     $instr                              // s2<- op
diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S
index a45e789..cad6318 100644
--- a/runtime/interpreter/mterp/arm64/fcmp.S
+++ b/runtime/interpreter/mterp/arm64/fcmp.S
@@ -1,4 +1,4 @@
-%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"}
+%default {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
     /*
      * Compare two floating-point values.  Puts 0, 1, or -1 into the
      * destination register based on the results of the comparison.
@@ -10,10 +10,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG$wide $r1, w2
     GET_VREG$wide $r2, w3
-    mov     w0, #$default_val
     fcmp $r1, $r2
-    csneg w0, w0, w0, $cond
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, $cond
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 2d3a11e..7628ed3 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -234,7 +234,7 @@
 #if MTERP_LOGGING
     mov  x0, xSELF
     add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
+    sxtw x2, wINST
     bl MterpLogOSR
 #endif
     mov  x0, #1                         // Signal normal return
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
index 9f5ad1e..aed830b 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrow.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -8,10 +8,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG $tgtreg, w4                // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
index 411396b..6fddfea 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG $tgtreg, w4                // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
index d83b39c..409e26b 100644
--- a/runtime/interpreter/mterp/arm64/funopWide.S
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
index 50a73f1..4c91ebc 100644
--- a/runtime/interpreter/mterp/arm64/funopWider.S
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -7,10 +7,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG $srcreg, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S
index 982e5b1..c4ad984 100644
--- a/runtime/interpreter/mterp/arm64/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S
@@ -5,8 +5,8 @@
     GET_VREG_WIDE x1, w2
     GET_VREG_WIDE x2, w3
     cmp     x1, x2
-    csinc   w0, wzr, wzr, eq
-    csneg   w0, w0, w0, ge
+    cset    w0, ne
+    cneg    w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG w0, w4
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
index 14f9ff8..30cb7eb 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
index 3a20cba..ba23f43 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"cc"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
index 06d5917..c739685 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
index d87d086..32a9319 100644
--- a/runtime/interpreter/mterp/arm64/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S
@@ -1 +1 @@
-%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"}
+%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"}
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
index e43628b..553d481 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -1,8 +1,7 @@
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    sbfm    x0, x0, 0, 31
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
index 527f7d8..9dc4fc3 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -1,10 +1,9 @@
     /* const-wide/32 vAA, #+BBBBbbbb */
-    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
     lsr     w3, wINST, #8               // w3<- AA
-    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
     FETCH_ADVANCE_INST 3                // advance rPC, load wINST
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
-    sbfm    x0, x0, 0, 31
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
index f50d9e4..86fa6db 100644
--- a/runtime/interpreter/mterp/arm64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S
@@ -1,11 +1,11 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
     GET_VREG w0, w3                     // w0<- vAA (array object)
-    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
     bl      MterpFillArrayData          // (obj, payload)
     cbz     w0, MterpPossibleException      // exception?
     FETCH_ADVANCE_INST 3                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
index 45c68a3..699b2c4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -5,8 +5,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     $load   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     $extend
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 2480d2d..30b30c2 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -3,7 +3,7 @@
     FETCH w4, 1                         // w4<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cbz     w3, common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     add     x4, x3, x4                  // create direct pointer
     ldr     x0, [x4]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
index 647bc75..a56705a 100644
--- a/runtime/interpreter/mterp/arm64/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -13,8 +13,7 @@
     mov       x3, xSELF                 // w3<- self
     bl        MterpInstanceOf           // (index, &obj, method, self)
     ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    lsr       w2, wINST, #8             // w2<- A+
-    and       w2, w2, #15               // w2<- A
+    ubfx      w2, wINST, #8, #4         // w2<- A
     PREFETCH_INST 2
     cbnz      x1, MterpException
     ADVANCE 2                           // advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 13d2120..35830f3 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1 @@
-%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}
+%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 27b5dc5..566e2bf 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -3,8 +3,7 @@
     FETCH w3, 1                         // w3<- field byte offset
     GET_VREG w2, w2                     // w2<- fp[B], the object pointer
     ubfx    w0, wINST, #8, #4           // w0<- A
-    cmp     w2, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     add     x1, x2, x3                  // create a direct pointer
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index 1456f1a..408e030 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,12 +9,12 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
index 0b91891..95f81c5 100644
--- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -1,12 +1,10 @@
     /* rem vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     bl  fmodf
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG s0, w9
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
index bd0f237..3062a3f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
index b4671d2..9a7e09f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 4dd32e0..17f57f9 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsl     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsl     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
index c214a18..493b740 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
index 3c1484b..6efe8ee 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 26d5024..274080c 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"asr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"asr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
index bb8382b..005452b 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
index dbccb99..1cb8cb7 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index 35090c4..ff30e1f 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and     w1, w1, #31", "instr":"lsr     w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsr     w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
index 6306fca..dcb2fb7 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -12,8 +12,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    $opcode  x0, x1, x2                 // Do the shift.
+    $opcode  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
index 77d104a..b860dfd 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -8,8 +8,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    $opcode x0, x0, x1
+    $opcode x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 8aa87b1..c25cd78 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -57,7 +57,7 @@
  * Returns 3 if we don't find a match (it's the size of the sparse-switch
  * instruction).
  */
-extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) {
   const int kInstrLen = 3;
   uint16_t size;
   const int32_t* keys;
@@ -109,7 +109,7 @@
   return kInstrLen;
 }
 
-extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
+extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) {
   const int kInstrLen = 3;
 
   /*
@@ -142,7 +142,7 @@
   return entries[index];
 }
 
-extern "C" bool MterpShouldSwitchInterpreters()
+extern "C" size_t MterpShouldSwitchInterpreters()
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const instrumentation::Instrumentation* const instrumentation =
       Runtime::Current()->GetInstrumentation();
@@ -150,8 +150,10 @@
 }
 
 
-extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame,
-                                   uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtual(Thread* self,
+                                     ShadowFrame* shadow_frame,
+                                     uint16_t* dex_pc_ptr,
+                                     uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -159,8 +161,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame,
-                                 uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeSuper(Thread* self,
+                                   ShadowFrame* shadow_frame,
+                                   uint16_t* dex_pc_ptr,
+                                   uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -168,8 +172,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame,
-                                     uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeInterface(Thread* self,
+                                       ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -177,8 +183,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame,
-                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeDirect(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -186,8 +194,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame,
-                                  uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeStatic(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    uint16_t* dex_pc_ptr,
+                                    uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -195,8 +205,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame,
-                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualRange(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -204,8 +216,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame,
-                                      uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeSuperRange(Thread* self,
+                                        ShadowFrame* shadow_frame,
+                                        uint16_t* dex_pc_ptr,
+                                        uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -213,8 +227,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame,
-                                          uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeInterfaceRange(Thread* self,
+                                            ShadowFrame* shadow_frame,
+                                            uint16_t* dex_pc_ptr,
+                                            uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -222,8 +238,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame,
-                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeDirectRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -231,8 +249,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame,
-                                       uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeStaticRange(Thread* self,
+                                         ShadowFrame* shadow_frame,
+                                         uint16_t* dex_pc_ptr,
+                                         uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -240,8 +260,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame,
-                                        uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuick(Thread* self,
+                                          ShadowFrame* shadow_frame,
+                                          uint16_t* dex_pc_ptr,
+                                          uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -249,8 +271,10 @@
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
-extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame,
-                                             uint16_t* dex_pc_ptr,  uint16_t inst_data )
+extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self,
+                                               ShadowFrame* shadow_frame,
+                                               uint16_t* dex_pc_ptr,
+                                               uint16_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -262,8 +286,10 @@
   QuasiAtomic::ThreadFenceForConstructor();
 }
 
-extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
-                                 Thread* self)
+extern "C" size_t MterpConstString(uint32_t index,
+                                   uint32_t tgt_vreg,
+                                   ShadowFrame* shadow_frame,
+                                   Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   String* s = ResolveString(self, *shadow_frame,  index);
   if (UNLIKELY(s == nullptr)) {
@@ -273,8 +299,10 @@
   return false;
 }
 
-extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame,
-                                Thread* self)
+extern "C" size_t MterpConstClass(uint32_t index,
+                                  uint32_t tgt_vreg,
+                                  ShadowFrame* shadow_frame,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -284,8 +312,10 @@
   return false;
 }
 
-extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* vreg_addr,
-                               art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpCheckCast(uint32_t index,
+                                 StackReference<mirror::Object>* vreg_addr,
+                                 art::ArtMethod* method,
+                                 Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -300,8 +330,10 @@
   return false;
 }
 
-extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* vreg_addr,
-                                art::ArtMethod* method, Thread* self)
+extern "C" size_t MterpInstanceOf(uint32_t index,
+                                  StackReference<mirror::Object>* vreg_addr,
+                                  art::ArtMethod* method,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
@@ -312,12 +344,12 @@
   return (obj != nullptr) && obj->InstanceOf(c);
 }
 
-extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   return FillArrayData(obj, payload);
 }
 
-extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
+extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   Object* obj = nullptr;
@@ -342,7 +374,7 @@
   return true;
 }
 
-extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
+extern "C" size_t MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
                                 uint32_t inst_data, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -350,23 +382,27 @@
       (self, *shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                uint32_t inst_data, Thread* self)
+extern "C" size_t MterpIputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data,
+                                  Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false>
       (self, *shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                     uint32_t inst_data)
+extern "C" size_t MterpIputObjectQuick(ShadowFrame* shadow_frame,
+                                       uint16_t* dex_pc_ptr,
+                                       uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data);
 }
 
-extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                uint32_t inst_data)
+extern "C" size_t MterpAputObject(ShadowFrame* shadow_frame,
+                                  uint16_t* dex_pc_ptr,
+                                  uint32_t inst_data)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
@@ -383,24 +419,27 @@
   return false;
 }
 
-extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                    Thread* self)
+extern "C" size_t MterpFilledNewArray(ShadowFrame* shadow_frame,
+                                      uint16_t* dex_pc_ptr,
+                                      Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self,
                                                shadow_frame->GetResultRegister());
 }
 
-extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                                         Thread* self)
+extern "C" size_t MterpFilledNewArrayRange(ShadowFrame* shadow_frame,
+                                           uint16_t* dex_pc_ptr,
+                                           Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self,
                                               shadow_frame->GetResultRegister());
 }
 
-extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr,
-                              uint32_t inst_data, Thread* self)
+extern "C" size_t MterpNewArray(ShadowFrame* shadow_frame,
+                                uint16_t* dex_pc_ptr,
+                                uint32_t inst_data, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
@@ -414,7 +453,7 @@
   return true;
 }
 
-extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
+extern "C" size_t MterpHandleException(Thread* self, ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(self->IsExceptionPending());
   const instrumentation::Instrumentation* const instrumentation =
@@ -526,14 +565,16 @@
   }
 }
 
-extern "C" bool MterpSuspendCheck(Thread* self)
+extern "C" size_t MterpSuspendCheck(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
   return MterpShouldSwitchInterpreters();
 }
 
-extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer,
-                                               uint64_t* new_value, Thread* self)
+extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
+                                                   ArtMethod* referrer,
+                                                   uint64_t* new_value,
+                                                   Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
@@ -551,8 +592,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value,
-                                        ArtMethod* referrer)
+extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
+                                            mirror::Object* obj,
+                                            uint8_t new_value,
+                                            ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
@@ -568,8 +611,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value,
-                                        ArtMethod* referrer)
+extern "C" ssize_t artSet16InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint16_t new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int16_t));
@@ -586,8 +631,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                         uint32_t new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet32InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint32_t new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int32_t));
@@ -598,8 +645,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                         uint64_t* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSet64InstanceFromMterp(uint32_t field_idx,
+                                             mirror::Object* obj,
+                                             uint64_t* new_value,
+                                             ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
@@ -610,8 +659,10 @@
   return -1;  // failure
 }
 
-extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj,
-                                          mirror::Object* new_value, ArtMethod* referrer)
+extern "C" ssize_t artSetObjInstanceFromMterp(uint32_t field_idx,
+                                              mirror::Object* obj,
+                                              mirror::Object* new_value,
+                                              ArtMethod* referrer)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
@@ -651,7 +702,7 @@
  * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
  * and regenerated following batch updates.
  */
-extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+extern "C" ssize_t MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   uint16_t hotness_count = method->GetCounter();
   int32_t countdown_value = jit::kJitHotnessDisabled;
@@ -689,7 +740,7 @@
  * Report a batch of hotness events to the instrumentation and then return the new
  * countdown value to the next time we should report.
  */
-extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
+extern "C" ssize_t MterpAddHotnessBatch(ArtMethod* method,
                                         ShadowFrame* shadow_frame,
                                         Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -702,7 +753,7 @@
 }
 
 // TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
-extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
+extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
@@ -719,9 +770,9 @@
   }
 }
 
-extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
-                                               ShadowFrame* shadow_frame,
-                                               int32_t offset)
+extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self,
+                                                 ShadowFrame* shadow_frame,
+                                                 int32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h
index 88e17bc..45ab98b 100644
--- a/runtime/interpreter/mterp/mterp.h
+++ b/runtime/interpreter/mterp/mterp.h
@@ -30,7 +30,12 @@
 
 void InitMterpTls(Thread* self);
 void CheckMterpAsmConstants();
-extern "C" bool MterpShouldSwitchInterpreters();
+
+// The return type should be 'bool' but our assembly stubs expect 'bool'
+// to be zero-extended to the whole register and that's broken on x86-64
+// as a 'bool' is returned in 'al' and the rest of 'rax' is garbage.
+// TODO: Fix mterp and stubs and revert this workaround. http://b/30232671
+extern "C" size_t MterpShouldSwitchInterpreters();
 
 // Poison value for TestExportPC.  If we segfault with this value, it means that a mterp
 // handler for a recent opcode failed to export the Dalvik PC prior to a possible exit from
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index df0b686..e318782 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -747,10 +747,9 @@
 .L_op_const_wide_16: /* 0x16 */
 /* File: arm64/op_const_wide_16.S */
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S x0, 1                       // x0<- ssssssssssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    sbfm    x0, x0, 0, 31
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
@@ -760,13 +759,12 @@
 .L_op_const_wide_32: /* 0x17 */
 /* File: arm64/op_const_wide_32.S */
     /* const-wide/32 vAA, #+BBBBbbbb */
-    FETCH w0, 1                         // w0<- 0000bbbb (low)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (low)
     lsr     w3, wINST, #8               // w3<- AA
-    FETCH_S w2, 2                       // w2<- ssssBBBB (high)
+    FETCH_S x2, 2                       // x2<- ssssssssssssBBBB (high)
     FETCH_ADVANCE_INST 3                // advance rPC, load wINST
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    orr     w0, w0, w2, lsl #16         // w0<- BBBBbbbb
-    sbfm    x0, x0, 0, 31
+    orr     x0, x0, x2, lsl #16         // x0<- ssssssssBBBBbbbb
     SET_VREG_WIDE x0, w3
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -934,8 +932,7 @@
     mov       x3, xSELF                 // w3<- self
     bl        MterpInstanceOf           // (index, &obj, method, self)
     ldr       x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    lsr       w2, wINST, #8             // w2<- A+
-    and       w2, w2, #15               // w2<- A
+    ubfx      w2, wINST, #8, #4         // w2<- A
     PREFETCH_INST 2
     cbnz      x1, MterpException
     ADVANCE 2                           // advance rPC
@@ -1053,12 +1050,12 @@
 /* File: arm64/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w1, w0, w1, lsl #16         // w1<- BBBBbbbb
+    orr     x1, x0, x1, lsl #16         // x1<- ssssssssBBBBbbbb
     GET_VREG w0, w3                     // w0<- vAA (array object)
-    add     x1, xPC, w1, lsl #1         // w1<- PC + BBBBbbbb*2 (array data off.)
+    add     x1, xPC, x1, lsl #1         // x1<- PC + ssssssssBBBBbbbb*2 (array data off.)
     bl      MterpFillArrayData          // (obj, payload)
     cbz     w0, MterpPossibleException      // exception?
     FETCH_ADVANCE_INST 3                // advance rPC, load rINST
@@ -1143,14 +1140,14 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
@@ -1168,14 +1165,14 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-    FETCH w0, 1                         // w0<- bbbb (lo)
-    FETCH w1, 2                         // w1<- BBBB (hi)
+    FETCH   w0, 1                       // x0<- 000000000000bbbb (lo)
+    FETCH_S x1, 2                       // x1<- ssssssssssssBBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
-    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    orr     x0, x0, x1, lsl #16         // x0<- ssssssssBBBBbbbb
     GET_VREG w1, w3                     // w1<- vAA
-    add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    add     x0, xPC, x0, lsl #1         // x0<- PC + ssssssssBBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
+    sxtw    xINST, w0
     b       MterpCommonTakenBranchNoFlags
 
 
@@ -1195,10 +1192,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG s1, w2
     GET_VREG s2, w3
-    mov     w0, #-1
     fcmp s1, s2
-    csneg w0, w0, w0, le
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1221,10 +1217,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG s1, w2
     GET_VREG s2, w3
-    mov     w0, #1
     fcmp s1, s2
-    csneg w0, w0, w0, pl
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, cc
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1247,10 +1242,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG_WIDE d1, w2
     GET_VREG_WIDE d2, w3
-    mov     w0, #-1
     fcmp d1, d2
-    csneg w0, w0, w0, le
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1273,10 +1267,9 @@
     lsr     w3, w0, #8                  // w3<- CC
     GET_VREG_WIDE d1, w2
     GET_VREG_WIDE d2, w3
-    mov     w0, #1
     fcmp d1, d2
-    csneg w0, w0, w0, pl
-    csel w0, wzr, w0, eq
+    cset w0, ne
+    cneg w0, w0, cc
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w4                     // vAA<- w0
@@ -1294,8 +1287,8 @@
     GET_VREG_WIDE x1, w2
     GET_VREG_WIDE x2, w3
     cmp     x1, x2
-    csinc   w0, wzr, wzr, eq
-    csneg   w0, w0, w0, ge
+    cset    w0, ne
+    cneg    w0, w0, lt
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG w0, w4
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -3345,11 +3338,10 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG w0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
-    sbfm x0, x0, 0, 31                              // d0<- op
+    sxtw x0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4           // vA<- d0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3369,10 +3361,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG w0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf s0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -3392,10 +3383,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG w0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf d0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3415,10 +3405,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
                                   // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG w0, w4                // vA<- d0
@@ -3438,10 +3427,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf s0, x0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -3461,10 +3449,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     scvtf d0, x0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3485,10 +3472,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs w0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG w0, w4                // vA<- d0
@@ -3508,10 +3494,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs x0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4           // vA<- d0
@@ -3531,10 +3516,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG s0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvt  d0, s0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE d0, w4           // vA<- d0
@@ -3554,10 +3538,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs w0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG w0, w4                // vA<- d0
@@ -3577,10 +3560,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvtzs x0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4           // vA<- d0
@@ -3600,10 +3582,9 @@
      */
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w4, wINST, #8               // w4<- A+
+    ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE d0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    and     w4, w4, #15                 // w4<- A
     fcvt s0, d0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG s0, w4                // vA<- d0
@@ -4032,7 +4013,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4071,7 +4052,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4110,7 +4091,7 @@
     cbz     w1, common_errDivideByZero  // is second operand zero?
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -4424,8 +4405,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    lsl  x0, x1, x2                 // Do the shift.
+    lsl  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -4450,8 +4430,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    asr  x0, x1, x2                 // Do the shift.
+    asr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -4476,8 +4455,7 @@
     and      w1, w0, #255                // w1<- BB
     GET_VREG_WIDE x1, w1                // x1<- vBB
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and      x2, x2, #63                 // Mask low 6
-    lsr  x0, x1, x2                 // Do the shift.
+    lsr  x0, x1, x2                 // Do the shift. Only low 6 bits of x2 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w3                // vAA<- x0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5089,7 +5067,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5125,7 +5103,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5161,7 +5139,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -5463,8 +5441,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    lsl x0, x0, x1
+    lsl x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5485,8 +5462,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    asr x0, x0, x1
+    asr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5507,8 +5483,7 @@
     GET_VREG w1, w1                     // x1<- vB
     GET_VREG_WIDE x0, w2                // x0<- vA
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    and     x1, x1, #63                 // Mask low 6 bits.
-    lsr x0, x0, x1
+    lsr x0, x0, x1                  // Do the shift. Only low 6 bits of x1 are used.
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG_WIDE x0, w2               // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
@@ -5529,8 +5504,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fadd   s2, s0, s1                              // s2<- op
@@ -5554,8 +5528,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fsub   s2, s0, s1                              // s2<- op
@@ -5579,8 +5552,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fmul   s2, s0, s1                              // s2<- op
@@ -5604,8 +5576,7 @@
      */
     /* binop/2addr vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     fdiv   s2, s0, s1                              // s2<- op
@@ -5621,13 +5592,11 @@
 /* File: arm64/op_rem_float_2addr.S */
     /* rem vA, vB */
     lsr     w3, wINST, #12              // w3<- B
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     GET_VREG s1, w3
     GET_VREG s0, w9
     bl  fmodf
-    lsr     w9, wINST, #8               // w9<- A+
-    and     w9, w9, #15                 // w9<- A
+    ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG s0, w9
@@ -6381,7 +6350,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsl     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6417,7 +6386,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     asr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6453,7 +6422,7 @@
     cbz     w1, common_errDivideByZero
     .endif
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
-    and     w1, w1, #31                           // optional op; may set condition codes
+                               // optional op; may set condition codes
     lsr     w0, w0, w1                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                // vAA<- w0
@@ -6471,8 +6440,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldr   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6489,7 +6457,7 @@
     FETCH w4, 1                         // w4<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cbz     w3, common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     add     x4, x3, x4                  // create direct pointer
     ldr     x0, [x4]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
@@ -6544,8 +6512,7 @@
     FETCH w3, 1                         // w3<- field byte offset
     GET_VREG w2, w2                     // w2<- fp[B], the object pointer
     ubfx    w0, wINST, #8, #4           // w0<- A
-    cmp     w2, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     add     x1, x2, x3                  // create a direct pointer
@@ -6710,8 +6677,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrb   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6731,8 +6697,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrsb   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6752,8 +6717,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrh   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -6773,8 +6737,7 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     x3, #0                      // check object for null
-    beq     common_errNullObject        // object was null
+    cbz     w3, common_errNullObject    // object was null
     ldrsh   w0, [x3, x1]                // w0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     
@@ -11521,7 +11484,7 @@
 #if MTERP_LOGGING
     mov  x0, xSELF
     add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
+    sxtw x2, wINST
     bl MterpLogOSR
 #endif
     mov  x0, #1                         // Signal normal return
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 9e2dcea..2f7b854 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -965,8 +965,8 @@
 /* File: x86_64/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
-    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + BBBBbbbb*2
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     testb   %al, %al                        # 0 means an exception is thrown
@@ -1051,8 +1051,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoPackedSwitch)
     testl   %eax, %eax
@@ -1074,8 +1074,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoSparseSwitch)
     testl   %eax, %eax
diff --git a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
index 626bad4..7ea36a6 100644
--- a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S
@@ -1,7 +1,7 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC
-    movl    2(rPC), %ecx                    # ecx <- BBBBbbbb
-    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + BBBBbbbb*2
+    movslq  2(rPC), %rcx                    # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,%rcx,2), OUT_ARG1          # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG0, rINSTq            # OUT_ARG0 <- vAA (array object)
     call    SYMBOL(MterpFillArrayData)      # (obj, payload)
     testb   %al, %al                        # 0 means an exception is thrown
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index fdf5a50..148552f 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -9,8 +9,8 @@
  * for: packed-switch, sparse-switch
  */
     /* op vAA, +BBBB */
-    movslq  2(rPC), OUT_ARG0                # rcx <- BBBBbbbb
-    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
+    movslq  2(rPC), OUT_ARG0                # rcx <- ssssssssBBBBbbbb
+    leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + ssssssssBBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL($func)
     testl   %eax, %eax
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index c047ba2..bb07fcb 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -25,12 +25,8 @@
 #include <sstream>
 
 #include "base/stringprintf.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "ScopedFd.h"
-#pragma GCC diagnostic pop
-
+#include "base/unix_file/fd_file.h"
+#include "os.h"
 #include "thread-inl.h"
 #include "utils.h"
 
@@ -301,7 +297,7 @@
     flags |= MAP_FIXED;
   }
 
-  ScopedFd fd(-1);
+  File fd;
 
   if (use_ashmem) {
     if (!kIsTargetBuild) {
@@ -320,8 +316,9 @@
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += name;
-    fd.reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count));
-    if (fd.get() == -1) {
+    fd.Reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count),
+             /* check_usage */ false);
+    if (fd.Fd() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
       return nullptr;
     }
@@ -335,7 +332,7 @@
                              page_aligned_byte_count,
                              prot,
                              flags,
-                             fd.get(),
+                             fd.Fd(),
                              0,
                              low_4gb);
   saved_errno = errno;
@@ -352,7 +349,7 @@
                                 page_aligned_byte_count,
                                 prot,
                                 flags,
-                                fd.get(),
+                                fd.Fd(),
                                 strerror(saved_errno));
     }
     return nullptr;
@@ -558,7 +555,7 @@
       return nullptr;
     }
   }
-  ScopedFd fd(int_fd);
+  File fd(int_fd, /* check_usage */ false);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
@@ -574,12 +571,12 @@
   // region. Note this isn't perfect as there's no way to prevent
   // other threads to try to take this memory region here.
   uint8_t* actual = reinterpret_cast<uint8_t*>(mmap(tail_base_begin, tail_base_size, tail_prot,
-                                              flags, fd.get(), 0));
+                                              flags, fd.Fd(), 0));
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
                               "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags,
-                              fd.get());
+                              fd.Fd());
     return nullptr;
   }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index b783a01..9a9fd87 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -457,6 +457,7 @@
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != nullptr) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  DCHECK(!method->IsCopied());
   // TODO cache to improve lookup speed
   const int32_t iftable_count = GetIfTableCount();
   IfTable* iftable = GetIfTable();
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..1c31c57 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -748,21 +748,24 @@
   return nullptr;
 }
 
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ArtField* Class::FindStaticField(Thread* self,
+                                 Class* klass,
+                                 const DexCache* dex_cache,
                                  uint32_t dex_field_idx) {
-  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+  for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
     if (f != nullptr) {
       return f;
     }
-    // Wrap k incase it moves during GetDirectInterface.
+    // Though GetDirectInterface() should not cause thread suspension when called
+    // from here, it takes a Handle as an argument, so we need to wrap `k`.
+    ScopedAssertNoThreadSuspension ants(self, __FUNCTION__);
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    Handle<mirror::Class> h_k(hs.NewHandle(k));
     // Is this field in any of this class' interfaces?
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+      mirror::Class* interface = GetDirectInterface(self, h_k, i);
       f = FindStaticField(self, interface, dex_cache, dex_field_idx);
       if (f != nullptr) {
         return f;
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index f044b59..9be9f01 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1091,7 +1091,9 @@
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
-  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+  static ArtField* FindStaticField(Thread* self,
+                                   Class* klass,
+                                   const DexCache* dex_cache,
                                    uint32_t dex_field_idx)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 46be5e6..b2349fc 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -378,13 +378,13 @@
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
 
-  OatFileAssistant oat_file_assistant(filename, target_instruction_set, profile_changed, false);
+  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false);
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
     return OatFileAssistant::kNoDexOptNeeded;
   }
-  return oat_file_assistant.GetDexOptNeeded(filter);
+  return oat_file_assistant.GetDexOptNeeded(filter, profile_changed);
 }
 
 static jstring DexFile_getDexFileStatus(JNIEnv* env,
@@ -411,7 +411,6 @@
   }
 
   OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
-                                      false /* profile_changed */,
                                       false /* load_executable */);
 
   std::ostringstream status;
@@ -486,7 +485,7 @@
     return JNI_FALSE;
   }
 
-  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false);
   return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
 }
 
@@ -553,6 +552,41 @@
   return oat_file != nullptr;
 }
 
+static jstring DexFile_getDexFileOutputPath(JNIEnv* env,
+                                            jclass,
+                                            jstring javaFilename,
+                                            jstring javaInstructionSet) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(
+      instruction_set.c_str());
+  if (target_instruction_set == kNone) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set.c_str()));
+    env->ThrowNew(iae.get(), message.c_str());
+    return nullptr;
+  }
+
+  OatFileAssistant oat_file_assistant(filename.c_str(),
+                                      target_instruction_set,
+                                      false /* load_executable */);
+
+  std::unique_ptr<OatFile> best_oat_file = oat_file_assistant.GetBestOatFile();
+  if (best_oat_file == nullptr) {
+    return nullptr;
+  }
+
+  return env->NewStringUTF(best_oat_file->GetLocation().c_str());
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile,
@@ -580,6 +614,8 @@
                 "(Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile, getDexFileStatus,
+                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, getDexFileOutputPath,
                 "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")
 };
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 79b18aa..d987f65 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -342,7 +342,7 @@
     return;
   }
   if (is_static) {
-    field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+    field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
   } else {
     field = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1aa789f..198a52e 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -46,6 +46,16 @@
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+
+  // Even if Yama is on a non-privileged native debugger should
+  // be able to attach to the debuggable app.
+  if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
+    // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
+    // case since it's expected behaviour.
+    if (errno != EINVAL) {
+      PLOG(ERROR) << "prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed for pid " << getpid();
+    }
+  }
 #endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
@@ -168,12 +178,17 @@
     // Only restart if it was streaming mode.
     // TODO: Expose buffer size, so we can also do file mode.
     if (output_mode == Trace::TraceOutputMode::kStreaming) {
-      const char* proc_name_cutils = get_process_name();
+      static constexpr size_t kMaxProcessNameLength = 100;
+      char name_buf[kMaxProcessNameLength] = {};
+      int rc = pthread_getname_np(pthread_self(), name_buf, kMaxProcessNameLength);
       std::string proc_name;
-      if (proc_name_cutils != nullptr) {
-        proc_name = proc_name_cutils;
+
+      if (rc == 0) {
+          // On success use the pthread name.
+          proc_name = name_buf;
       }
-      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+
+      if (proc_name.empty() || proc_name == "zygote" || proc_name == "zygote64") {
         // Either no process name, or the name hasn't been changed, yet. Just use pid.
         pid_t pid = getpid();
         proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
diff --git a/runtime/oat.h b/runtime/oat.h
index 6243660..9b8f545 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '2', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 218c490..8700a90 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -39,7 +39,6 @@
 #include "os.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
-#include "ScopedFd.h"
 #include "utils.h"
 
 namespace art {
@@ -64,17 +63,15 @@
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, profile_changed, load_executable)
+    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
 { }
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const char* oat_location,
                                    const InstructionSet isa,
-                                   bool profile_changed,
                                    bool load_executable)
-    : isa_(isa), profile_changed_(profile_changed), load_executable_(load_executable) {
+    : isa_(isa), load_executable_(load_executable) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -84,12 +81,18 @@
     load_executable_ = false;
   }
 
-  // If the user gave a target oat location, save that as the cached oat
-  // location now so we won't try to construct the default location later.
+  std::string error_msg;
+  if (!DexLocationToOdexFilename(dex_location_, isa_, &odex_file_name_, &error_msg)) {
+    LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
+  }
+
   if (oat_location != nullptr) {
-    cached_oat_file_name_ = std::string(oat_location);
-    cached_oat_file_name_attempted_ = true;
-    cached_oat_file_name_found_ = true;
+    oat_file_name_ = std::string(oat_location);
+  } else {
+    if (!DexLocationToOatFilename(dex_location_, isa_, &oat_file_name_, &error_msg)) {
+      LOG(WARNING) << "Failed to determine oat file name for dex location "
+        << dex_location_ << ": " << error_msg;
+    }
   }
 }
 
@@ -134,29 +137,43 @@
   return true;
 }
 
-bool OatFileAssistant::OatFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
+static bool GivenOatFileCompilerFilterIsOkay(const OatFile& oat_file,
+                                             CompilerFilter::Filter target,
+                                             bool profile_changed) {
+  CompilerFilter::Filter current = oat_file.GetCompilerFilter();
+
+  if (profile_changed && CompilerFilter::DependsOnProfile(current)) {
+    VLOG(oat) << "Compiler filter not okay because Profile changed";
+    return false;
+  }
+  return CompilerFilter::IsAsGoodAs(current, target);
+}
+
+bool OatFileAssistant::OatFileCompilerFilterIsOkay(CompilerFilter::Filter target,
+                                                   bool profile_changed) {
   const OatFile* oat_file = GetOatFile();
   if (oat_file != nullptr) {
-    CompilerFilter::Filter current = oat_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
+    return GivenOatFileCompilerFilterIsOkay(*oat_file, target, profile_changed);
   }
   return false;
 }
 
-bool OatFileAssistant::OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target) {
+bool OatFileAssistant::OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target,
+                                                    bool profile_changed) {
   const OatFile* odex_file = GetOdexFile();
   if (odex_file != nullptr) {
-    CompilerFilter::Filter current = odex_file->GetCompilerFilter();
-    return CompilerFilter::IsAsGoodAs(current, target);
+    return GivenOatFileCompilerFilterIsOkay(*odex_file, target, profile_changed);
   }
   return false;
 }
 
-OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target) {
+OatFileAssistant::DexOptNeeded
+OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target,
+                                  bool profile_changed) {
   bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
   // See if the oat file is in good shape as is.
-  bool oat_okay = OatFileCompilerFilterIsOkay(target);
+  bool oat_okay = OatFileCompilerFilterIsOkay(target, profile_changed);
   if (oat_okay) {
     if (compilation_desired) {
       if (OatFileIsUpToDate()) {
@@ -170,7 +187,7 @@
   }
 
   // See if the odex file is in good shape as is.
-  bool odex_okay = OdexFileCompilerFilterIsOkay(target);
+  bool odex_okay = OdexFileCompilerFilterIsOkay(target, profile_changed);
   if (odex_okay) {
     if (compilation_desired) {
       if (OdexFileIsUpToDate()) {
@@ -225,13 +242,13 @@
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::MakeUpToDate(std::string* error_msg) {
+OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) {
   CompilerFilter::Filter target;
   if (!GetRuntimeCompilerFilterOption(&target, error_msg)) {
     return kUpdateNotAttempted;
   }
 
-  switch (GetDexOptNeeded(target)) {
+  switch (GetDexOptNeeded(target, profile_changed)) {
     case kNoDexOptNeeded: return kUpdateSucceeded;
     case kDex2OatNeeded: return GenerateOatFile(error_msg);
     case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
@@ -341,19 +358,7 @@
 }
 
 const std::string* OatFileAssistant::OdexFileName() {
-  if (!cached_odex_file_name_attempted_) {
-    cached_odex_file_name_attempted_ = true;
-
-    std::string error_msg;
-    cached_odex_file_name_found_ = DexFilenameToOdexFilename(
-        dex_location_, isa_, &cached_odex_file_name_, &error_msg);
-    if (!cached_odex_file_name_found_) {
-      // If we can't figure out the odex file, we treat it as if the odex
-      // file was inaccessible.
-      LOG(WARNING) << "Failed to determine odex file name: " << error_msg;
-    }
-  }
-  return cached_odex_file_name_found_ ? &cached_odex_file_name_ : nullptr;
+  return odex_file_name_.empty() ? nullptr : &odex_file_name_;
 }
 
 bool OatFileAssistant::OdexFileExists() {
@@ -361,26 +366,20 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
-  if (OdexFileIsOutOfDate()) {
-    return kOatOutOfDate;
+  if (!odex_file_status_attempted_) {
+    odex_file_status_attempted_ = true;
+    const OatFile* odex_file = GetOdexFile();
+    if (odex_file == nullptr) {
+      cached_odex_file_status_ = kOatOutOfDate;
+    } else {
+      cached_odex_file_status_ = GivenOatFileStatus(*odex_file);
+    }
   }
-  if (OdexFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return cached_odex_file_status_;
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
-  if (!odex_file_is_out_of_date_attempted_) {
-    odex_file_is_out_of_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_out_of_date_ = true;
-    } else {
-      cached_odex_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_out_of_date_;
+  return OdexFileStatus() == kOatOutOfDate;
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
@@ -388,16 +387,7 @@
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
-  if (!odex_file_is_up_to_date_attempted_) {
-    odex_file_is_up_to_date_attempted_ = true;
-    const OatFile* odex_file = GetOdexFile();
-    if (odex_file == nullptr) {
-      cached_odex_file_is_up_to_date_ = false;
-    } else {
-      cached_odex_file_is_up_to_date_ = GivenOatFileIsUpToDate(*odex_file);
-    }
-  }
-  return cached_odex_file_is_up_to_date_;
+  return OdexFileStatus() == kOatUpToDate;
 }
 
 CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
@@ -406,7 +396,8 @@
 
   return odex_file->GetCompilerFilter();
 }
-std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
+
+static std::string ArtFileName(const OatFile* oat_file) {
   const std::string oat_file_location = oat_file->GetLocation();
   // Replace extension with .art
   const size_t last_ext = oat_file_location.find_last_of('.');
@@ -418,26 +409,7 @@
 }
 
 const std::string* OatFileAssistant::OatFileName() {
-  if (!cached_oat_file_name_attempted_) {
-    cached_oat_file_name_attempted_ = true;
-
-    // Compute the oat file name from the dex location.
-    // TODO: The oat file assistant should be the definitive place for
-    // determining the oat file name from the dex location, not
-    // GetDalvikCacheFilename.
-    std::string cache_dir = StringPrintf("%s%s",
-        DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
-    std::string error_msg;
-    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_.c_str(),
-        cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
-    if (!cached_oat_file_name_found_) {
-      // If we can't determine the oat file name, we treat the oat file as
-      // inaccessible.
-      LOG(WARNING) << "Failed to determine oat file name for dex location "
-        << dex_location_ << ": " << error_msg;
-    }
-  }
-  return cached_oat_file_name_found_ ? &cached_oat_file_name_ : nullptr;
+  return oat_file_name_.empty() ? nullptr : &oat_file_name_;
 }
 
 bool OatFileAssistant::OatFileExists() {
@@ -445,26 +417,20 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
-  if (OatFileIsOutOfDate()) {
-    return kOatOutOfDate;
+  if (!oat_file_status_attempted_) {
+    oat_file_status_attempted_ = true;
+    const OatFile* oat_file = GetOatFile();
+    if (oat_file == nullptr) {
+      cached_oat_file_status_ = kOatOutOfDate;
+    } else {
+      cached_oat_file_status_ = GivenOatFileStatus(*oat_file);
+    }
   }
-  if (OatFileIsUpToDate()) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
+  return cached_oat_file_status_;
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
-  if (!oat_file_is_out_of_date_attempted_) {
-    oat_file_is_out_of_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_out_of_date_ = true;
-    } else {
-      cached_oat_file_is_out_of_date_ = GivenOatFileIsOutOfDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_out_of_date_;
+  return OatFileStatus() == kOatOutOfDate;
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
@@ -472,16 +438,7 @@
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
-  if (!oat_file_is_up_to_date_attempted_) {
-    oat_file_is_up_to_date_attempted_ = true;
-    const OatFile* oat_file = GetOatFile();
-    if (oat_file == nullptr) {
-      cached_oat_file_is_up_to_date_ = false;
-    } else {
-      cached_oat_file_is_up_to_date_ = GivenOatFileIsUpToDate(*oat_file);
-    }
-  }
-  return cached_oat_file_is_up_to_date_;
+  return OatFileStatus() == kOatUpToDate;
 }
 
 CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
@@ -492,19 +449,6 @@
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
-  // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
-  // is more work than we need to do. If performance becomes a concern, and
-  // this method is actually called, this should be fixed.
-  if (GivenOatFileIsOutOfDate(file)) {
-    return kOatOutOfDate;
-  }
-  if (GivenOatFileIsUpToDate(file)) {
-    return kOatUpToDate;
-  }
-  return kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
@@ -512,7 +456,7 @@
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
       dex_location_.c_str(), dex_checksum_pointer, false);
   if (oat_dex_file == nullptr) {
-    return true;
+    return kOatOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
@@ -537,7 +481,7 @@
           << secondary_dex_location
           << ". Expected: " << expected_secondary_checksum
           << ", Actual: " << actual_secondary_checksum;
-        return true;
+        return kOatOutOfDate;
       }
     } else {
       // If we can't get the checksum for the secondary location, we assume
@@ -557,7 +501,7 @@
       VLOG(oat) << "No image for oat image checksum to match against.";
 
       if (HasOriginalDexFiles()) {
-        return true;
+        return kOatOutOfDate;
       }
 
       // If there is no original dex file to fall back to, grudgingly accept
@@ -571,45 +515,18 @@
     } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
         != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
-      return true;
+      return kOatOutOfDate;
     }
   } else {
     VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
   }
 
-  // Verify the profile hasn't changed recently.
-  // TODO: Move this check to OatFileCompilerFilterIsOkay? Nothing bad should
-  // happen if we use an oat file compiled with an out-of-date profile.
-  if (CompilerFilter::DependsOnProfile(current_compiler_filter)) {
-    if (profile_changed_) {
-      VLOG(oat) << "The profile has changed recently.";
-      return true;
-    }
-  } else {
-    VLOG(oat) << "Profile check skipped for compiler filter " << current_compiler_filter;
-  }
-
-  // Everything looks good; the dex file is not out of date.
-  return false;
-}
-
-bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kOatNeedsRelocation;
-}
-
-bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
-  if (GivenOatFileIsOutOfDate(file)) {
-    return false;
-  }
-
-  CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
-
   if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
         VLOG(oat) << "No image to check oat relocation against.";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_data_begin recorded for the image in the oat file matches
@@ -621,7 +538,7 @@
           ": Oat file image oat_data_begin (" << oat_data_begin << ")"
           << " does not match actual image oat_data_begin ("
           << image_info->oat_data_begin << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
 
       // Verify the oat_patch_delta recorded for the image in the oat file matches
@@ -632,7 +549,7 @@
           ": Oat file image patch delta (" << oat_patch_delta << ")"
           << " does not match actual image patch delta ("
           << image_info->patch_delta << ")";
-        return false;
+        return kOatNeedsRelocation;
       }
     } else {
       // Oat files compiled in PIC mode do not require relocation.
@@ -641,7 +558,7 @@
   } else {
     VLOG(oat) << "Oat relocation test skipped for compiler filter " << current_compiler_filter;
   }
-  return true;
+  return kOatUpToDate;
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
@@ -812,8 +729,10 @@
   return Exec(argv, error_msg);
 }
 
-bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location,
-    InstructionSet isa, std::string* odex_filename, std::string* error_msg) {
+bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location,
+                                                 InstructionSet isa,
+                                                 std::string* odex_filename,
+                                                 std::string* error_msg) {
   CHECK(odex_filename != nullptr);
   CHECK(error_msg != nullptr);
 
@@ -852,9 +771,12 @@
   return true;
 }
 
-std::string OatFileAssistant::DalvikCacheDirectory() {
-  // Note: We don't cache this, because it will only be called once by
-  // OatFileName.
+bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
+                                                InstructionSet isa,
+                                                std::string* oat_filename,
+                                                std::string* error_msg) {
+  CHECK(oat_filename != nullptr);
+  CHECK(error_msg != nullptr);
 
   // TODO: The work done in GetDalvikCache is overkill for what we need.
   // Ideally a new API for getting the DalvikCacheDirectory the way we want
@@ -862,12 +784,16 @@
   // of the GetDalvikCache family of functions. Until such an API is in place,
   // we use GetDalvikCache to avoid duplicating the logic for determining the
   // dalvik cache directory.
-  std::string result;
-  bool have_android_data;
-  bool dalvik_cache_exists;
-  bool is_global_cache;
-  GetDalvikCache("", false, &result, &have_android_data, &dalvik_cache_exists, &is_global_cache);
-  return result;
+  std::string dalvik_cache_dir;
+  bool ignored;
+  GetDalvikCache("", false, &dalvik_cache_dir, &ignored, &ignored, &ignored);
+
+  // TODO: The oat file assistant should be the definitive place for
+  // determining the oat file name from the dex location, not
+  // GetDalvikCacheFilename.
+  std::string cache_dir = StringPrintf("%s%s",
+      dalvik_cache_dir.c_str(), GetInstructionSetString(isa));
+  return GetDalvikCacheFilename(location.c_str(), cache_dir.c_str(), oat_filename, error_msg);
 }
 
 std::string OatFileAssistant::ImageLocation() {
@@ -946,8 +872,7 @@
 void OatFileAssistant::ClearOdexFileCache() {
   odex_file_load_attempted_ = false;
   cached_odex_file_.reset();
-  odex_file_is_out_of_date_attempted_ = false;
-  odex_file_is_up_to_date_attempted_ = false;
+  odex_file_status_attempted_ = false;
 }
 
 const OatFile* OatFileAssistant::GetOatFile() {
@@ -987,8 +912,7 @@
 void OatFileAssistant::ClearOatFileCache() {
   oat_file_load_attempted_ = false;
   cached_oat_file_.reset();
-  oat_file_is_out_of_date_attempted_ = false;
-  oat_file_is_up_to_date_attempted_ = false;
+  oat_file_status_attempted_ = false;
 }
 
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index d55e373..04bd20c 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -101,14 +101,10 @@
   // device. For example, on an arm device, use arm or arm64. An oat file can
   // be loaded executable only if the ISA matches the current runtime.
   //
-  // profile_changed should be true if the profile has recently changed
-  // for this dex location.
-  //
   // load_executable should be true if the caller intends to try and load
   // executable code for this dex location.
   OatFileAssistant(const char* dex_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   // Constructs an OatFileAssistant, providing an explicit target oat_location
@@ -116,7 +112,6 @@
   OatFileAssistant(const char* dex_location,
                    const char* oat_location,
                    const InstructionSet isa,
-                   bool profile_changed,
                    bool load_executable);
 
   ~OatFileAssistant();
@@ -145,8 +140,10 @@
 
   // Return what action needs to be taken to produce up-to-date code for this
   // dex location that is at least as good as an oat file generated with the
-  // given compiler filter.
-  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
+  // given compiler filter. profile_changed should be true to indicate the
+  // profile has recently changed for this dex location.
+  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
+                               bool profile_changed = false);
 
   // Returns true if there is up-to-date code for this dex location,
   // irrespective of the compiler filter of the up-to-date code.
@@ -164,11 +161,15 @@
 
   // Attempts to generate or relocate the oat file as needed to make it up to
   // date based on the current runtime and compiler options.
+  // profile_changed should be true to indicate the profile has recently
+  // changed for this dex location.
+  //
+  // Returns the result of attempting to update the code.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate MakeUpToDate(std::string* error_msg);
+  ResultOfAttemptToUpdate MakeUpToDate(bool profile_changed, std::string* error_msg);
 
   // Returns an oat file that can be used for loading dex files.
   // Returns null if no suitable oat file was found.
@@ -179,7 +180,7 @@
   std::unique_ptr<OatFile> GetBestOatFile();
 
   // Open and returns an image space associated with the oat file.
-  gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
+  static gc::space::ImageSpace* OpenImageSpace(const OatFile* oat_file);
 
   // Loads the dex files in the given oat file for the given dex location.
   // The oat file should be up to date for the given dex location.
@@ -238,15 +239,9 @@
   // |OatFileExists() == true|.
   CompilerFilter::Filter OatFileCompilerFilter();
 
-  // Return image file name. Does not cache since it relies on the oat file.
-  std::string ArtFileName(const OatFile* oat_file) const;
-
-  // These methods return the status for a given opened oat file with respect
-  // to the dex location.
+  // Return the status for a given opened oat file with respect to the dex
+  // location.
   OatStatus GivenOatFileStatus(const OatFile& file);
-  bool GivenOatFileIsOutOfDate(const OatFile& file);
-  bool GivenOatFileNeedsRelocation(const OatFile& file);
-  bool GivenOatFileIsUpToDate(const OatFile& file);
 
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
@@ -282,10 +277,24 @@
   // Constructs the odex file name for the given dex location.
   // Returns true on success, in which case odex_filename is set to the odex
   // file name.
-  // Returns false on error, in which case error_msg describes the error.
+  // Returns false on error, in which case error_msg describes the error and
+  // odex_filename is not changed.
   // Neither odex_filename nor error_msg may be null.
-  static bool DexFilenameToOdexFilename(const std::string& location,
-      InstructionSet isa, std::string* odex_filename, std::string* error_msg);
+  static bool DexLocationToOdexFilename(const std::string& location,
+                                        InstructionSet isa,
+                                        std::string* odex_filename,
+                                        std::string* error_msg);
+
+  // Constructs the oat file name for the given dex location.
+  // Returns true on success, in which case oat_filename is set to the oat
+  // file name.
+  // Returns false on error, in which case error_msg describes the error and
+  // oat_filename is not changed.
+  // Neither oat_filename nor error_msg may be null.
+  static bool DexLocationToOatFilename(const std::string& location,
+                                       InstructionSet isa,
+                                       std::string* oat_filename,
+                                       std::string* error_msg);
 
   static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
 
@@ -297,11 +306,6 @@
     std::string location;
   };
 
-  // Returns the path to the dalvik cache directory.
-  // Does not check existence of the cache or try to create it.
-  // Includes the trailing slash.
-  // Returns an empty string if we can't get the dalvik cache directory path.
-  std::string DalvikCacheDirectory();
 
   // Returns the current image location.
   // Returns an empty string if the image location could not be retrieved.
@@ -324,8 +328,9 @@
   const OatFile* GetOdexFile();
 
   // Returns true if the compiler filter used to generate the odex file is at
-  // least as good as the given target filter.
-  bool OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target);
+  // least as good as the given target filter. profile_changed should be true
+  // to indicate the profile has recently changed for this dex location.
+  bool OdexFileCompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
 
   // Returns true if the odex file is opened executable.
   bool OdexFileIsExecutable();
@@ -343,8 +348,9 @@
   const OatFile* GetOatFile();
 
   // Returns true if the compiler filter used to generate the oat file is at
-  // least as good as the given target filter.
-  bool OatFileCompilerFilterIsOkay(CompilerFilter::Filter target);
+  // least as good as the given target filter. profile_changed should be true
+  // to indicate the profile has recently changed for this dex location.
+  bool OatFileCompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
 
   // Returns true if the oat file is opened executable.
   bool OatFileIsExecutable();
@@ -375,9 +381,6 @@
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
 
-  // Whether the profile has recently changed.
-  bool profile_changed_ = false;
-
   // Whether we will attempt to load oat files executable.
   bool load_executable_ = false;
 
@@ -388,11 +391,9 @@
   bool required_dex_checksum_found_;
   bool has_original_dex_files_;
 
-  // Cached value of the odex file name.
-  // This should be accessed only by the OdexFileName() method.
-  bool cached_odex_file_name_attempted_ = false;
-  bool cached_odex_file_name_found_;
-  std::string cached_odex_file_name_;
+  // The sentinel value "" is used if the odex file name could not be
+  // determined.
+  std::string odex_file_name_;
 
   // Cached value of the loaded odex file.
   // Use the GetOdexFile method rather than accessing this directly, unless you
@@ -400,19 +401,13 @@
   bool odex_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_odex_file_;
 
-  // Cached results for OdexFileIsOutOfDate
-  bool odex_file_is_out_of_date_attempted_ = false;
-  bool cached_odex_file_is_out_of_date_;
+  // Cached results for OdexFileStatus
+  bool odex_file_status_attempted_ = false;
+  OatStatus cached_odex_file_status_;
 
-  // Cached results for OdexFileIsUpToDate
-  bool odex_file_is_up_to_date_attempted_ = false;
-  bool cached_odex_file_is_up_to_date_;
-
-  // Cached value of the oat file name.
-  // This should be accessed only by the OatFileName() method.
-  bool cached_oat_file_name_attempted_ = false;
-  bool cached_oat_file_name_found_;
-  std::string cached_oat_file_name_;
+  // The sentinel value "" is used if the oat file name could not be
+  // determined.
+  std::string oat_file_name_;
 
   // Cached value of the loaded oat file.
   // Use the GetOatFile method rather than accessing this directly, unless you
@@ -420,13 +415,9 @@
   bool oat_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_oat_file_;
 
-  // Cached results for OatFileIsOutOfDate
-  bool oat_file_is_out_of_date_attempted_ = false;
-  bool cached_oat_file_is_out_of_date_;
-
-  // Cached results for OatFileIsUpToDate
-  bool oat_file_is_up_to_date_attempted_ = false;
-  bool cached_oat_file_is_up_to_date_;
+  // Cached results for OatFileStatus
+  bool oat_file_status_attempted_ = false;
+  OatStatus cached_oat_file_status_;
 
   // Cached value of the image info.
   // Use the GetImageInfo method rather than accessing these directly.
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index a1d3ed9..39848b4 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -213,22 +213,22 @@
 // generation of oat files.
 static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
   // Use an oat file assistant to find the proper oat location.
-  OatFileAssistant ofa(dex_location, kRuntimeISA, false, false);
-  const std::string* oat_location = ofa.OatFileName();
-  ASSERT_TRUE(oat_location != nullptr);
+  std::string oat_location;
+  std::string error_msg;
+  ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
+        dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
 
   std::vector<std::string> args;
   args.push_back("--dex-file=" + std::string(dex_location));
-  args.push_back("--oat-file=" + *oat_location);
+  args.push_back("--oat-file=" + oat_location);
   args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
   args.push_back("--runtime-arg");
   args.push_back("-Xnorelocate");
-  std::string error_msg;
   ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
 
   // Verify the oat file was generated as expected.
-  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location->c_str(),
-                                                  oat_location->c_str(),
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(),
+                                                  oat_location.c_str(),
                                                   nullptr,
                                                   nullptr,
                                                   false,
@@ -245,7 +245,7 @@
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -275,7 +275,7 @@
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -283,7 +283,7 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(&error_msg));
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -297,7 +297,7 @@
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -321,18 +321,23 @@
 }
 
 // Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDexOptNeeded if the profile hasn't changed.
+// Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
+// kDex2Oat if the profile has changed.
 TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -346,32 +351,6 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
-// Case: We have a DEX file and speed-profile OAT file for it.
-// Expect: The status is kNoDex2OatNeeded if the profile has changed.
-TEST_F(OatFileAssistantTest, ProfileOatOutOfDate) {
-  std::string dex_location = GetScratchDir() + "/ProfileOatOutOfDate.jar";
-  Copy(GetDexSrc1(), dex_location);
-  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
-
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true, false);
-
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
@@ -379,9 +358,9 @@
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load both dex files.
@@ -406,9 +385,9 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -435,7 +414,7 @@
   // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
                                       oat_location.c_str(),
-                                      kRuntimeISA, false, true);
+                                      kRuntimeISA, true);
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -455,7 +434,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -482,7 +461,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -518,7 +497,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -536,7 +515,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -577,7 +556,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -600,7 +579,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -635,7 +614,7 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -658,7 +637,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -686,7 +665,7 @@
   GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
@@ -710,7 +689,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -746,7 +725,7 @@
   GenerateNoPatchOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -755,7 +734,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
@@ -785,7 +764,7 @@
 
   // Verify things don't go bad.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, false, true);
+      oat_location.c_str(), kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -820,7 +799,7 @@
   GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -848,7 +827,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
@@ -874,7 +853,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -893,7 +872,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -912,7 +891,7 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -932,11 +911,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -948,7 +927,7 @@
   EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
 
   // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
@@ -964,11 +943,11 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() == nullptr);
@@ -981,7 +960,7 @@
   std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, false, true);
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
@@ -1031,7 +1010,7 @@
   Copy(GetDexSrc1(), abs_dex_location);
 
   std::string dex_location = MakePathRelative(abs_dex_location);
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -1049,7 +1028,7 @@
 TEST_F(OatFileAssistantTest, ShortDexLocation) {
   std::string dex_location = "/xx";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1066,7 +1045,7 @@
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -1076,7 +1055,7 @@
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -1173,7 +1152,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1195,7 +1174,7 @@
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1209,12 +1188,12 @@
   std::string dex_location = GetScratchDir() + "/RuntimeCompilerFilterOptionUsed.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
@@ -1222,7 +1201,7 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -1230,24 +1209,24 @@
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=bogus");
   EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(&error_msg));
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
 }
 
-TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
+TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) {
   std::string error_msg;
   std::string odex_file;
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "nopath.jar", kArm, &odex_file, &error_msg));
-  EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
+  EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename(
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index b7e6040..7680517 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -558,7 +558,6 @@
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
                                       kRuntimeISA,
-                                      /*profile_changed*/false,
                                       !runtime->IsAotCompiler());
 
   // Lock the target oat location to avoid races generating and loading the
@@ -576,7 +575,7 @@
     // Update the oat file on disk if we can, based on the --compiler-filter
     // option derived from the current runtime options.
     // This may fail, but that's okay. Best effort is all that matters here.
-    switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
+    switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/false, /*out*/ &error_msg)) {
       case OatFileAssistant::kUpdateFailed:
         LOG(WARNING) << error_msg;
         break;
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index f45e9f6..1d1413b 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -53,8 +53,9 @@
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
   CHECK(name != nullptr);
-  std::unique_ptr<File> file(new File);
-  if (!file->Open(name, flags, 0666)) {
+  bool read_only = (flags == O_RDONLY);
+  std::unique_ptr<File> file(new File(name, flags, 0666, !read_only));
+  if (!file->IsOpened()) {
     return nullptr;
   }
   return file.release();
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 0c3eb3b..92efa21 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -220,7 +220,7 @@
 }
 
 inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
-  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+  return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
 inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 21cd2aa..079c079 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -989,6 +989,7 @@
                        xgc_option.verify_pre_sweeping_rosalloc_,
                        xgc_option.verify_post_gc_rosalloc_,
                        xgc_option.gcstress_,
+                       xgc_option.measure_,
                        runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
 
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index ad91cde..953a377 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk
@@ -88,9 +88,9 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For simulator_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixl
+     LOCAL_SHARED_LIBRARIES += libvixl-arm64
   else
-     LOCAL_SHARED_LIBRARIES += libvixl
+     LOCAL_SHARED_LIBRARIES += libvixl-arm64
   endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
diff --git a/runtime/simulator/code_simulator_arm64.cc b/runtime/simulator/code_simulator_arm64.cc
index 39dfa6d..897d4f5 100644
--- a/runtime/simulator/code_simulator_arm64.cc
+++ b/runtime/simulator/code_simulator_arm64.cc
@@ -16,13 +16,15 @@
 
 #include "simulator/code_simulator_arm64.h"
 
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
 namespace art {
 namespace arm64 {
 
-// VIXL has not been tested on 32bit architectures, so vixl::Simulator is not always
+// VIXL has not been tested on 32bit architectures, so Simulator is not always
 // available. To avoid linker error on these architectures, we check if we can simulate
 // in the beginning of following methods, with compile time constant `kCanSimulate`.
-// TODO: when vixl::Simulator is always available, remove the these checks.
+// TODO: when Simulator is always available, remove the these checks.
 
 CodeSimulatorArm64* CodeSimulatorArm64::CreateCodeSimulatorArm64() {
   if (kCanSimulate) {
@@ -35,8 +37,8 @@
 CodeSimulatorArm64::CodeSimulatorArm64()
     : CodeSimulator(), decoder_(nullptr), simulator_(nullptr) {
   DCHECK(kCanSimulate);
-  decoder_ = new vixl::Decoder();
-  simulator_ = new vixl::Simulator(decoder_);
+  decoder_ = new Decoder();
+  simulator_ = new Simulator(decoder_);
 }
 
 CodeSimulatorArm64::~CodeSimulatorArm64() {
@@ -47,22 +49,22 @@
 
 void CodeSimulatorArm64::RunFrom(intptr_t code_buffer) {
   DCHECK(kCanSimulate);
-  simulator_->RunFrom(reinterpret_cast<const vixl::Instruction*>(code_buffer));
+  simulator_->RunFrom(reinterpret_cast<const Instruction*>(code_buffer));
 }
 
 bool CodeSimulatorArm64::GetCReturnBool() const {
   DCHECK(kCanSimulate);
-  return simulator_->wreg(0);
+  return simulator_->ReadWRegister(0);
 }
 
 int32_t CodeSimulatorArm64::GetCReturnInt32() const {
   DCHECK(kCanSimulate);
-  return simulator_->wreg(0);
+  return simulator_->ReadWRegister(0);
 }
 
 int64_t CodeSimulatorArm64::GetCReturnInt64() const {
   DCHECK(kCanSimulate);
-  return simulator_->xreg(0);
+  return simulator_->ReadXRegister(0);
 }
 
 }  // namespace arm64
diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h
index 10fceb9..69388b1 100644
--- a/runtime/simulator/code_simulator_arm64.h
+++ b/runtime/simulator/code_simulator_arm64.h
@@ -19,10 +19,11 @@
 
 #include "memory"
 #include "simulator/code_simulator.h"
+
 // TODO: make vixl clean wrt -Wshadow.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "vixl/a64/simulator-a64.h"
+#include "a64/simulator-a64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
@@ -42,10 +43,10 @@
  private:
   CodeSimulatorArm64();
 
-  vixl::Decoder* decoder_;
-  vixl::Simulator* simulator_;
+  vixl::aarch64::Decoder* decoder_;
+  vixl::aarch64::Simulator* simulator_;
 
-  // TODO: Enable CodeSimulatorArm64 for more host ISAs once vixl::Simulator supports them.
+  // TODO: Enable CodeSimulatorArm64 for more host ISAs once Simulator supports them.
   static constexpr bool kCanSimulate = (kRuntimeISA == kX86_64);
 
   DISALLOW_COPY_AND_ASSIGN(CodeSimulatorArm64);
diff --git a/compiler/utils/string_reference.h b/runtime/string_reference.h
similarity index 84%
rename from compiler/utils/string_reference.h
rename to runtime/string_reference.h
index e4c34ca..c75c218 100644
--- a/compiler/utils/string_reference.h
+++ b/runtime/string_reference.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_UTILS_STRING_REFERENCE_H_
-#define ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#ifndef ART_RUNTIME_STRING_REFERENCE_H_
+#define ART_RUNTIME_STRING_REFERENCE_H_
 
 #include <stdint.h>
 
@@ -37,6 +37,16 @@
   uint32_t string_index;
 };
 
+// Compare only the reference and not the string contents.
+struct StringReferenceComparator {
+  bool operator()(const StringReference& a, const StringReference& b) {
+    if (a.dex_file != b.dex_file) {
+      return a.dex_file < b.dex_file;
+    }
+    return a.string_index < b.string_index;
+  }
+};
+
 // Compare the actual referenced string values. Used for string reference deduplication.
 struct StringReferenceValueComparator {
   bool operator()(StringReference sr1, StringReference sr2) const {
@@ -62,4 +72,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_UTILS_STRING_REFERENCE_H_
+#endif  // ART_RUNTIME_STRING_REFERENCE_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b9ee442..50f76da 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2574,7 +2574,38 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
-  QUICK_ENTRY_POINT_INFO(pReadBarrierMark)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg00)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg01)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg02)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg03)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg04)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg05)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg06)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg07)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg08)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg09)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg10)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg11)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg12)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg13)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg14)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg15)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg16)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg17)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg18)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg19)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg20)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg21)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg22)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg23)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg24)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg25)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg26)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg27)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg28)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg30)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg31)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
diff --git a/runtime/thread.h b/runtime/thread.h
index ab24625..a3a4005 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1352,7 +1352,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
-      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
+      thread_local_objects(0), thread_local_pos(nullptr), thread_local_end(nullptr),
       mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
@@ -1468,11 +1468,11 @@
 
     // Thread-local allocation pointer.
     uint8_t* thread_local_start;
+    size_t thread_local_objects;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
-    size_t thread_local_objects;
 
     // Mterp jump table bases.
     void* mterp_current_ibase;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b879355..1e15960 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -645,31 +645,11 @@
   }
 }
 
-static void GetVisitedMethodsFromBitSets(
-    const std::map<const DexFile*, DexIndexBitSet*>& seen_methods,
-    std::set<ArtMethod*>* visited_methods) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* const self = Thread::Current();
-  for (auto& e : seen_methods) {
-    DexIndexBitSet* bit_set = e.second;
-    // TODO: Visit trace methods as roots.
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(self, *e.first, false);
-    for (uint32_t i = 0; i < bit_set->size(); ++i) {
-      if ((*bit_set)[i]) {
-        visited_methods->insert(dex_cache->GetResolvedMethod(i, sizeof(void*)));
-      }
-    }
-  }
-}
-
 void Trace::FinishTracing() {
   size_t final_offset = 0;
 
   std::set<ArtMethod*> visited_methods;
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    // Write the secondary file with all the method names.
-    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
-
     // Clean up.
     STLDeleteValues(&seen_methods_);
   } else {
@@ -715,8 +695,8 @@
   std::string header(os.str());
 
   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
-    File file;
-    if (!file.Open(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY)) {
+    File file(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY, true);
+    if (!file.IsOpened()) {
       LOG(WARNING) << "Could not open secondary trace file!";
       return;
     }
@@ -850,11 +830,6 @@
 bool Trace::RegisterMethod(ArtMethod* method) {
   mirror::DexCache* dex_cache = method->GetDexCache();
   const DexFile* dex_file = dex_cache->GetDexFile();
-  auto* resolved_method = dex_cache->GetResolvedMethod(method->GetDexMethodIndex(), sizeof(void*));
-  if (resolved_method != method) {
-    DCHECK(resolved_method == nullptr);
-    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method, sizeof(void*));
-  }
   if (seen_methods_.find(dex_file) == seen_methods_.end()) {
     seen_methods_.insert(std::make_pair(dex_file, new DexIndexBitSet()));
   }
@@ -869,7 +844,7 @@
 bool Trace::RegisterThread(Thread* thread) {
   pid_t tid = thread->GetTid();
   CHECK_LT(0U, static_cast<uint32_t>(tid));
-  CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+  CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber);
 
   if (!(*seen_threads_)[tid]) {
     seen_threads_->set(tid);
@@ -880,8 +855,7 @@
 
 std::string Trace::GetMethodLine(ArtMethod* method) {
   method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-  return StringPrintf("%p\t%s\t%s\t%s\t%s\n",
-                      reinterpret_cast<void*>((EncodeTraceMethod(method) << TraceActionBits)),
+  return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (EncodeTraceMethod(method) << TraceActionBits),
       PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
       method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
 }
diff --git a/runtime/trace.h b/runtime/trace.h
index 80f1a4c..9b29fb9 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -41,7 +41,9 @@
 class Thread;
 
 using DexIndexBitSet = std::bitset<65536>;
-using ThreadIDBitSet = std::bitset<65536>;
+
+constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U;
+using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>;
 
 enum TracingMode {
   kTracingInactive,
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 6a50b8e..3f779df 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -136,8 +136,8 @@
 }
 
 bool ReadFileToString(const std::string& file_name, std::string* result) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
@@ -155,8 +155,8 @@
 }
 
 bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
-  File file;
-  if (!file.Open(file_name, O_RDONLY)) {
+  File file(file_name, O_RDONLY, false);
+  if (!file.IsOpened()) {
     return false;
   }
 
diff --git a/runtime/utils.h b/runtime/utils.h
index c1e88a4..b2746ee 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -382,13 +382,19 @@
 #endif
 
 template <typename T>
-T GetRandomNumber(T min, T max) {
+static T GetRandomNumber(T min, T max) {
   CHECK_LT(min, max);
   std::uniform_int_distribution<T> dist(min, max);
   RNG rng;
   return dist(rng);
 }
 
+// All of the elements from one container to another.
+template <typename Dest, typename Src>
+static void AddAll(Dest& dest, const Src& src) {
+  dest.insert(src.begin(), src.end());
+}
+
 // Return the file size in bytes or -1 if the file does not exists.
 int64_t GetFileSizeBytes(const std::string& filename);
 
diff --git a/test/412-new-array/info.txt b/test/412-new-array/info.txt
index cb388b6..b5f834a 100644
--- a/test/412-new-array/info.txt
+++ b/test/412-new-array/info.txt
@@ -1 +1,3 @@
 Simple tests for new-array, filled-new-array and fill-array-data.
+Regression test for the arm64 mterp miscalculating the fill-array-data-payload
+address, zero-extending a register instead of sign-extending.
diff --git a/test/412-new-array/smali/fill_array_data.smali b/test/412-new-array/smali/fill_array_data.smali
index 34776db..2b24e56 100644
--- a/test/412-new-array/smali/fill_array_data.smali
+++ b/test/412-new-array/smali/fill_array_data.smali
@@ -15,6 +15,21 @@
 
 .end method
 
+.method public static intArrayFillInstructionAfterData([I)V
+   .registers 1
+   goto :FillInstruction
+
+:ArrayData
+    .array-data 4
+        1 2 3 4 5
+    .end array-data
+
+:FillInstruction
+   fill-array-data v0, :ArrayData
+   return-void
+
+.end method
+
 .method public static shortArray([S)V
    .registers 1
 
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index b9c2a05..d95d2c5 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -259,6 +259,45 @@
     }
 
     {
+      Method m = c.getMethod("intArrayFillInstructionAfterData", int[].class);
+      int[] array = new int[7];
+      Object[] args = { array };
+      m.invoke(null, args);
+      assertEquals(7, array.length);
+      assertEquals(1, array[0]);
+      assertEquals(2, array[1]);
+      assertEquals(3, array[2]);
+      assertEquals(4, array[3]);
+      assertEquals(5, array[4]);
+      assertEquals(0, array[5]);
+      assertEquals(0, array[6]);
+
+      array = new int[2];
+      args[0] = array;
+      Throwable exception  = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof IndexOutOfBoundsException);
+      }
+      assertNotNull(exception);
+      exception = null;
+      // Test that nothing has been written to the array.
+      assertEquals(0, array[0]);
+      assertEquals(0, array[1]);
+
+      args[0] = null;
+      try {
+        m.invoke(null, args);
+      } catch (InvocationTargetException e) {
+        exception = e.getCause();
+        assertTrue(exception instanceof NullPointerException);
+      }
+      assertNotNull(exception);
+    }
+
+    {
       Method m = c.getMethod("shortArray", short[].class);
       short[] array = new short[7];
       Object[] args = { array };
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 41771b5..c125e33 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1204,9 +1204,6 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
@@ -1217,7 +1214,7 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // Several HDeoptimize will be added. Two for each index.
+    // Three HDeoptimize will be added for the bounds.
     // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
@@ -1266,20 +1263,12 @@
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
   /// CHECK: Goto
   /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -1398,8 +1387,8 @@
   /// CHECK-NOT: Deoptimize
 
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. Two for the index
-    // and one for null check on array.
+    // Three HDeoptimize will be added. Two for the index and one for null check on array. Then
+    // simplification removes one redundant HDeoptimize.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c717eaa..040479e 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1142,7 +1142,13 @@
 
   public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
     if (doThrow) { throw new Error(); }
-    return (arg ? 0 : 1) != 2;
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) != 2;
+  }
+
+  public static int $inline$ReturnArg(int arg) {
+    return arg;
   }
 
   /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
@@ -1161,7 +1167,9 @@
 
   public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
     if (doThrow) { throw new Error(); }
-    return (arg ? 0 : 1) == 2;
+    // Make calls that will be inlined to make sure the instruction simplifier
+    // sees the simplification (dead code elimination will also try to simplify it).
+    return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) == 2;
   }
 
   /*
@@ -1971,8 +1979,165 @@
     return (value >> temp) + temp;
   }
 
-public static void main(String[] args) {
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Y>>]
+
+  public static int $noinline$intAddSubSimplifyArg1(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg2(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:i\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intAddSubSimplifyArg2(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyLeft(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyLeft(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (before)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:i\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: int Main.$noinline$intSubAddSimplifyRight(int, int) instruction_simplifier (after)
+  /// CHECK:          <<X:i\d+>>        ParameterValue
+  /// CHECK:          <<Y:i\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<X>>]
+
+  public static int $noinline$intSubAddSimplifyRight(int x, int y) {
+    if (doThrow) { throw new Error(); }
+    int sub = x - y;
+    return y + sub;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg1(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<X>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg1(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - x;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatAddSubSimplifyArg2(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sum:f\d+>>      Add [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Sub [<<Sum>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatAddSubSimplifyArg2(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sum = x + y;
+    return sum - y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyLeft(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Sub>>,<<Y>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyLeft(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return sub + y;
+  }
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (before)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  /// CHECK-START: float Main.$noinline$floatSubAddSimplifyRight(float, float) instruction_simplifier (after)
+  /// CHECK:          <<X:f\d+>>        ParameterValue
+  /// CHECK:          <<Y:f\d+>>        ParameterValue
+  /// CHECK-DAG:      <<Sub:f\d+>>      Sub [<<X>>,<<Y>>]
+  /// CHECK-DAG:      <<Res:f\d+>>      Add [<<Y>>,<<Sub>>]
+  /// CHECK-DAG:                        Return [<<Res>>]
+
+  public static float $noinline$floatSubAddSimplifyRight(float x, float y) {
+    if (doThrow) { throw new Error(); }
+    float sub = x - y;
+    return y + sub;
+  }
+
+ public static void main(String[] args) {
     int arg = 123456;
+    float floatArg = 123456.125f;
 
     assertLongEquals(arg, $noinline$Add0(arg));
     assertIntEquals(5, $noinline$AddAddSubAddConst(1));
@@ -2143,6 +2308,15 @@
     assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
+
+    assertIntEquals(654321, $noinline$intAddSubSimplifyArg1(arg, 654321));
+    assertIntEquals(arg, $noinline$intAddSubSimplifyArg2(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyLeft(arg, 654321));
+    assertIntEquals(arg, $noinline$intSubAddSimplifyRight(arg, 654321));
+    assertFloatEquals(654321.125f, $noinline$floatAddSubSimplifyArg1(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatAddSubSimplifyArg2(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyLeft(floatArg, 654321.125f));
+    assertFloatEquals(floatArg, $noinline$floatSubAddSimplifyRight(floatArg, 654321.125f));
   }
 
   private static boolean $inline$true() { return true; }
diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt
index fbd93fa..988b220 100644
--- a/test/501-regression-packed-switch/info.txt
+++ b/test/501-regression-packed-switch/info.txt
@@ -1,2 +1,4 @@
 Regression test for the interpreter and optimizing's builder which used
 to trip when compiled code contained a packed switch with no targets.
+Regression test for the arm64 mterp miscalculating the switch table
+address, zero-extending a register instead of sign-extending.
diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali
index 8756ed5..5a760c7 100644
--- a/test/501-regression-packed-switch/smali/Test.smali
+++ b/test/501-regression-packed-switch/smali/Test.smali
@@ -27,3 +27,28 @@
   .packed-switch 0x0
   .end packed-switch
 .end method
+
+.method public static PackedSwitchAfterData(I)I
+  .registers 1
+  goto :pswitch_instr
+
+  :case0
+  const/4 v0, 0x1
+  return v0
+
+  :pswitch_data
+  .packed-switch 0x0
+    :case0
+    :case1
+  .end packed-switch
+
+  :pswitch_instr
+  packed-switch v0, :pswitch_data
+  const/4 v0, 0x7
+  return v0
+
+  :case1
+  const/4 v0, 0x4
+  return v0
+
+.end method
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index b80bc62..12bc1a8 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -29,5 +29,10 @@
     if (result != 5) {
       throw new Error("Expected 5, got " + result);
     }
+    m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class });
+    result = (Integer) m.invoke(null, new Integer(0));
+    if (result != 1) {
+      throw new Error("Expected 1, got " + result);
+    }
   }
 }
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index ead9446..3366f20 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -34,9 +34,21 @@
   /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
 
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:           <<Array:l\d+>>         NullCheck
+  /// CHECK:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                              IntermediateAddress
+  /// CHECK:                                  ArrayGet [<<Array>>,<<Index>>]
+
   public static int constantIndexGet(int array[]) {
     return array[1];
   }
@@ -55,10 +67,23 @@
   /// CHECK:             <<Const2:i\d+>>        IntConstant 2
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
 
 
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+  /// CHECK-START-ARM:   void Main.constantIndexSet(int[]) instruction_simplifier_arm (after)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
   public static void constantIndexSet(int array[]) {
     array[1] = 2;
   }
@@ -76,7 +101,20 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
+
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM:   int Main.get(int[], int) instruction_simplifier_arm (after)
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
 
   public static int get(int array[], int index) {
@@ -102,7 +140,26 @@
   /// CHECK:             <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (before)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+  /// CHECK-START-ARM:   void Main.set(int[], int, int) instruction_simplifier_arm (after)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
 
   public static void set(int array[], int index, int value) {
@@ -126,10 +183,10 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
@@ -137,12 +194,42 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
   public static void getSet(int array[], int index) {
     array[index] = array[index] + 1;
   }
@@ -166,11 +253,11 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
@@ -178,11 +265,45 @@
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
   /// CHECK:             <<Array:l\d+>>         NullCheck
   /// CHECK:             <<Index:i\d+>>         BoundsCheck
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    NewArray
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   public static int[] accrossGC(int array[], int index) {
@@ -196,6 +317,14 @@
    * Test that the intermediate address is shared between array accesses after
    * the bounds check have been removed by BCE.
    */
+  // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach
+  // the architecture-specific instruction simplifier, BCE has removed the bounds checks in
+  // the loop.
+
+  // Note that we do not care that the `DataOffset` is `12`. But if we do not
+  // specify it and any other `IntConstant` appears before that instruction,
+  // checker will match the previous `IntConstant`, and we will thus fail the
+  // check.
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
   /// CHECK:             <<Const1:i\d+>>        IntConstant 1
@@ -207,14 +336,6 @@
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
   /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
 
-  // By the time we reach the architecture-specific instruction simplifier, BCE
-  // has removed the bounds checks in the loop.
-
-  // Note that we do not care that the `DataOffset` is `12`. But if we do not
-  // specify it and any other `IntConstant` appears before that instruction,
-  // checker will match the previous `IntConstant`, and we will thus fail the
-  // check.
-
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
@@ -222,10 +343,10 @@
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
@@ -235,10 +356,47 @@
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
-  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                IntermediateAddress
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
 
   public static int canMergeAfterBCE1() {
@@ -279,12 +437,12 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
-  /// CHECK-DAG:         <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:             <<Address3:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
@@ -295,7 +453,7 @@
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
-  /// CHECK-DAG:         <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
   /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
@@ -304,8 +462,55 @@
   // There should be only one intermediate address computation in the loop.
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
-  /// CHECK:                                    Arm64IntermediateAddress
-  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
+
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Address3:l\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK:                                    IntermediateAddress
+  /// CHECK-NOT:                                IntermediateAddress
 
   public static int canMergeAfterBCE2() {
     int[] array = {0, 1, 2, 3};
@@ -315,6 +520,37 @@
     return array[array.length - 1];
   }
 
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-DAG:         <<Array1:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array2:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Array3:l\d+>>        NewArray
+  /// CHECK-DAG:         <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                ArrayGet [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array2>>,<<Index>>]
+  /// CHECK-DAG:                                ArrayGet [<<Array3>>,<<Index>>]
+
+  /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+  /// CHECK-NOT:                                IntermediateAddress
+  public static int checkLongFloatDouble() {
+    long[] array_long = {0, 1, 2, 3};
+    float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0};
+    double[] array_double = {0.0, 1.0, 2.0, 3.0};
+    double s = 0.0;
+
+    for (int i = 0; i < 4; i++) {
+      s += (double)array_long[i] + (double)array_float[i] + array_double[i];
+    }
+    return (int)s;
+  }
 
   public static void main(String[] args) {
     int[] array = {123, 456, 789};
@@ -337,5 +573,7 @@
 
     assertIntEquals(4, canMergeAfterBCE1());
     assertIntEquals(6, canMergeAfterBCE2());
+
+    assertIntEquals(18, checkLongFloatDouble());
   }
 }
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 5a36ba5..7b5cbc1 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -77,6 +77,16 @@
     expectEquals(123456789123456789f, UnresolvedClass.staticFloat);
     expectEquals(123456789123456789d, UnresolvedClass.staticDouble);
     expectEquals(o, UnresolvedClass.staticObject);
+
+    // Check "large" values.
+
+    UnresolvedClass.staticByte = (byte)-1;
+    UnresolvedClass.staticChar = (char)32768;
+    UnresolvedClass.staticInt = -1;
+
+    expectEquals((byte)-1, UnresolvedClass.staticByte);
+    expectEquals((char)32768, UnresolvedClass.staticChar);
+    expectEquals(-1, UnresolvedClass.staticInt);
   }
 
   /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before)
diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/530-checker-loops3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt
new file mode 100644
index 0000000..07d99a3
--- /dev/null
+++ b/test/530-checker-loops3/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations, in particular loop-based dynamic bce.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
new file mode 100644
index 0000000..5ffcbe9
--- /dev/null
+++ b/test/530-checker-loops3/src/Main.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations, in particular dynamic BCE. In all cases,
+// bounds check on a[] is resolved statically. Bounds checks on b[]
+// exercise various different scenarios. In all cases, loop-based
+// dynamic BCE is better than the dominator-based BCE, since it
+// generates the test outside the loop.
+//
+public class Main {
+
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneConstantIndex(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleConstantIndices(int[] a, int[] b) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[0] + b[1] + b[2];
+    }
+  }
+
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneInvariantIndex(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires two deopts: one null and one bound.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleInvariantIndices(int[] a, int[] b, int c) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[c-1] + b[c] + b[c+1];
+    }
+  }
+
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void oneUnitStride(int[] a, int[] b) {
+    // Dynamic bce on b requires three deopts: one null and two bounds.
+    for (int i = 0; i < a.length; i++) {
+      a[i] = b[i];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStrides(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // One redundant deopt is removed by simplifier.
+    // TODO: range information could remove another
+    for (int i = 1; i < a.length - 1; i++) {
+      a[i] = b[i-1] + b[i] + b[i+1];
+    }
+  }
+
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void multipleUnitStridesConditional(int[] a, int[] b) {
+    // Dynamic bce on b requires four deopts: one null and three bounds.
+    // The two conditional references may be included, since they are in range.
+    // One redundant deopt is removed by simplifier.
+    for (int i = 2; i < a.length - 2; i++) {
+      int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]);
+      a[i] = t;
+    }
+  }
+
+  /// CHECK-START: void Main.shifter(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.shifter(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void shifter(int[] x) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    // Two redundant deopts are removed by simplifier.
+    for (int i = 16; i < 80; i++) {
+      int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16];
+      x[i] = t << 1 | t >>> 31;
+    }
+  }
+
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-DAG: Deoptimize loop:none
+  /// CHECK-NOT: Deoptimize
+  //
+  /// CHECK-START: void Main.stencil(int[], int, int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static void stencil(int[] array, int start, int end) {
+    // Real-life example: should have four deopts: one null and three bounds.
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] a = new int[10];
+    int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int b1[] = { 100 };
+
+    oneConstantIndex(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneConstantIndex(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleConstantIndices(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleConstantIndices(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneInvariantIndex(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(2, a[i]);;
+    }
+    try {
+      oneInvariantIndex(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleInvariantIndices(a, b, 1);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(6, a[i]);;
+    }
+    try {
+      multipleInvariantIndices(a, b1, 1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    oneUnitStride(a, b);
+    for (int i = 0; i < a.length; i++) {
+      expectEquals(i + 1, a[i]);;
+    }
+    try {
+      oneUnitStride(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+      expectEquals(100, a[0]);;
+    }
+
+    multipleUnitStrides(a, b);
+    for (int i = 1; i < a.length - 1; i++) {
+      expectEquals(3 * i + 3, a[i]);;
+    }
+    try {
+      multipleUnitStrides(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    multipleUnitStridesConditional(a, b);
+    for (int i = 2; i < a.length - 2; i++) {
+      int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
+      expectEquals(e, a[i]);;
+    }
+    try {
+      multipleUnitStridesConditional(a, b1);
+      throw new Error("Should throw AIOOBE");
+    } catch (ArrayIndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/566-polymorphic-inlining/polymorphic_inline.cc b/test/566-polymorphic-inlining/polymorphic_inline.cc
index c0d93dd..9f4c6c9 100644
--- a/test/566-polymorphic-inlining/polymorphic_inline.cc
+++ b/test/566-polymorphic-inlining/polymorphic_inline.cc
@@ -81,6 +81,7 @@
 
   do_checks(cls, "testInvokeVirtual");
   do_checks(cls, "testInvokeInterface");
+  do_checks(cls, "testInvokeInterface2");
   do_checks(cls, "$noinline$testInlineToSameTarget");
 }
 
diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java
index d39e6ed..53852a4 100644
--- a/test/566-polymorphic-inlining/src/Main.java
+++ b/test/566-polymorphic-inlining/src/Main.java
@@ -16,6 +16,8 @@
 
 interface Itf {
   public Class sameInvokeInterface();
+  public Class sameInvokeInterface2();
+  public Class sameInvokeInterface3();
 }
 
 public class Main implements Itf {
@@ -50,6 +52,8 @@
       testInvokeVirtual(mains[1]);
       testInvokeInterface(itfs[0]);
       testInvokeInterface(itfs[1]);
+      testInvokeInterface2(itfs[0]);
+      testInvokeInterface2(itfs[1]);
       $noinline$testInlineToSameTarget(mains[0]);
       $noinline$testInlineToSameTarget(mains[1]);
     }
@@ -64,9 +68,13 @@
     assertEquals(Itf.class, testInvokeInterface(itfs[0]));
     assertEquals(Itf.class, testInvokeInterface(itfs[1]));
 
+    assertEquals(Itf.class, testInvokeInterface2(itfs[0]));
+    assertEquals(Itf.class, testInvokeInterface2(itfs[1]));
+
     // This will trigger a deoptimization of the compiled code.
     assertEquals(OtherSubclass.class, testInvokeVirtual(mains[2]));
     assertEquals(OtherSubclass.class, testInvokeInterface(itfs[2]));
+    assertEquals(null, testInvokeInterface2(itfs[2]));
 
     // Run this once to make sure we execute the JITted code.
     $noinline$testInlineToSameTarget(mains[0]);
@@ -83,10 +91,28 @@
     return Itf.class;
   }
 
+  public Class sameInvokeInterface2() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
+  public Class sameInvokeInterface3() {
+    field.getClass(); // null check to ensure we get an inlined frame in the CodeInfo.
+    return Itf.class;
+  }
+
   public static Class testInvokeInterface(Itf i) {
     return i.sameInvokeInterface();
   }
 
+  public static Class testInvokeInterface2(Itf i) {
+    // Make three interface calls that will do a ClassTableGet to ensure bogus code
+    // generation of ClassTableGet will crash.
+    i.sameInvokeInterface();
+    i.sameInvokeInterface2();
+    return i.sameInvokeInterface3();
+  }
+
   public static Class testInvokeVirtual(Main m) {
     return m.sameInvokeVirtual();
   }
@@ -120,4 +146,11 @@
   public Class sameInvokeInterface() {
     return OtherSubclass.class;
   }
+
+  public Class sameInvokeInterface2() {
+    return null;
+  }
+  public Class sameInvokeInterface3() {
+    return null;
+  }
 }
diff --git a/test/604-hot-static-interface/hot_static_interface.cc b/test/604-hot-static-interface/hot_static_interface.cc
deleted file mode 100644
index 475a11d..0000000
--- a/test/604-hot-static-interface/hot_static_interface.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "jit/jit.h"
-#include "jit/jit_code_cache.h"
-#include "jit/profiling_info.h"
-#include "oat_quick_method_header.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedUtfChars.h"
-#include "stack_map.h"
-
-namespace art {
-
-extern "C" JNIEXPORT void JNICALL Java_Main_waitUntilJitted(JNIEnv* env,
-                                                            jclass,
-                                                            jclass itf,
-                                                            jstring method_name) {
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit == nullptr) {
-    return;
-  }
-
-  ScopedObjectAccess soa(Thread::Current());
-
-  ScopedUtfChars chars(env, method_name);
-  CHECK(chars.c_str() != nullptr);
-
-  mirror::Class* klass = soa.Decode<mirror::Class*>(itf);
-  ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
-
-  jit::JitCodeCache* code_cache = jit->GetCodeCache();
-  OatQuickMethodHeader* header = nullptr;
-  // Make sure there is a profiling info, required by the compiler.
-  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
-  while (true) {
-    header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
-    if (code_cache->ContainsPc(header->GetCode())) {
-      break;
-    } else {
-      // Sleep to yield to the compiler thread.
-      usleep(1000);
-      // Will either ensure it's compiled or do the compilation itself.
-      jit->CompileMethod(method, soa.Self(), /* osr */ false);
-    }
-  }
-}
-
-}  // namespace art
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
index 559f15d..04d7cd6 100644
--- a/test/604-hot-static-interface/src/Main.java
+++ b/test/604-hot-static-interface/src/Main.java
@@ -22,14 +22,14 @@
       Itf.foo(new Object());
     }
 
-    waitUntilJitted(Itf.class, "foo");
+    ensureJitCompiled(Itf.class, "foo");
 
     if (!Itf.foo(new Object())) {
       throw new Error("Unexpected result");
     }
   }
 
-  private static native void waitUntilJitted(Class itf, String method_name);
+  private static native void ensureJitCompiled(Class itf, String method_name);
 }
 
 interface Itf {
diff --git a/test/609-checker-x86-bounds-check/expected.txt b/test/609-checker-x86-bounds-check/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/609-checker-x86-bounds-check/info.txt b/test/609-checker-x86-bounds-check/info.txt
new file mode 100644
index 0000000..c0f26d0
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/info.txt
@@ -0,0 +1 @@
+Checker test that we combine ArrayLength and BoundsCheck on x86/x86_64.
diff --git a/test/609-checker-x86-bounds-check/src/Main.java b/test/609-checker-x86-bounds-check/src/Main.java
new file mode 100644
index 0000000..bfc2be8
--- /dev/null
+++ b/test/609-checker-x86-bounds-check/src/Main.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    int[] array = new int[51];
+    testArrayLengthBoundsCheckX86(array, 10);
+
+    System.out.println("passed");
+  }
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], <<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + <<IndexReg>> * 4 + 12], 9
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (before)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) x86_memory_operand_generation (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK-DAG:     <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-DAG:     <<Length:i\d+>>        ArrayLength [<<CheckedArray>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-DAG:     <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-DAG:     <<ArraySet:v\d+>>      ArraySet [<<CheckedArray>>,<<CheckedIndex>>,<<Value>>]
+
+  // Test assumes parameter value is in lower 8 registers (it is passed in edx).
+  /// CHECK-START-X86_64: void Main.testArrayLengthBoundsCheckX86(int[], int) disassembly (after)
+  /// CHECK-DAG:     <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Index:i\d+>>         ParameterValue
+  /// CHECK-DAG:     <<Value:i\d+>>         IntConstant 9
+  /// CHECK:         <<CheckedArray:l\d+>>  NullCheck [<<Array>>]
+  /// CHECK-NEXT:    <<Length:i\d+>>        ArrayLength [<<Array>>] is_string_length:false emitted_at_use:true loop:none
+  /// CHECK-NEXT:    <<CheckedIndex:i\d+>>  BoundsCheck [<<Index>>,<<Length>>]
+  /// CHECK-NEXT:                           cmp [<<BaseReg:\w+>> + 8], e<<IndexReg:\w+>>
+  /// CHECK:         <<ArraySet:v\d+>>      ArraySet [<<Array>>,<<Index>>,<<Value>>]
+  /// CHECK-NEXT:                           mov [<<BaseReg>> + r<<IndexReg>> * 4 + 12], 9
+
+  static void testArrayLengthBoundsCheckX86(int[] array, int index) {
+    array[index] = 9;
+  }
+}
diff --git a/test/611-checker-simplify-if/expected.txt b/test/611-checker-simplify-if/expected.txt
new file mode 100644
index 0000000..3083c4c
--- /dev/null
+++ b/test/611-checker-simplify-if/expected.txt
@@ -0,0 +1,7 @@
+54
+54
+54
+12
+12
+12
+33
diff --git a/test/611-checker-simplify-if/info.txt b/test/611-checker-simplify-if/info.txt
new file mode 100644
index 0000000..b090db8
--- /dev/null
+++ b/test/611-checker-simplify-if/info.txt
@@ -0,0 +1 @@
+Checker tests for the 'if' simplification in the compiler.
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
new file mode 100644
index 0000000..21f4115
--- /dev/null
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testNoInline(args);
+    System.out.println(staticField);
+    testInline(args);
+    System.out.println(staticField);
+    testNonConstantInputs(args);
+    System.out.println(staticField);
+    testNonConstantEqual(args);
+    System.out.println(staticField);
+    testGreaterCondition(args);
+    System.out.println(staticField);
+    testSwitch(args);
+    System.out.println(staticField);
+    testFP(args);
+    System.out.println(staticField);
+  }
+
+  // Test when a condition is the input of the if.
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK:                   If
+  /// CHECK: <<Phi:i\d+>>      Phi
+  /// CHECK: <<Equal:z\d+>>    Equal [<<Phi>>,<<Const0>>]
+  /// CHECK:                   If [<<Equal>>]
+
+  /// CHECK-START: void Main.testNoInline(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  Equal
+  /// CHECK-NOT:  If
+  public static void testNoInline(String[] args) {
+    boolean myVar = false;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when the phi is the input of the if.
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination_final (before)
+  /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                   If
+  /// CHECK-DAG: <<Phi:i\d+>>      Phi
+  /// CHECK-DAG:                   If [<<Phi>>]
+
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination_final (after)
+  /// CHECK:      If
+  /// CHECK-NOT:  Phi
+  /// CHECK-NOT:  If
+  public static void testInline(String[] args) {
+    boolean myVar = $inline$doTest(args);
+    if (myVar) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  public static boolean $inline$doTest(String[] args) {
+    boolean myVar;
+    if (args.length == 42) {
+      myVar = true;
+    } else {
+      staticField = 32;
+      myVar = false;
+    }
+    return myVar;
+  }
+
+  // Test when one input is not a constant. We can only optimize the constant input.
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantInputs(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<StaticFieldGet>>,<<Const42>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantInputs(String[] args) {
+    int a = 42;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = otherStaticField;
+    }
+    if (a == 42) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test with a condition.
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const22:i\d+>>         IntConstant 22
+  /// CHECK-DAG: <<Const25:i\d+>>         IntConstant 25
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<Const22>>]
+  /// CHECK-DAG: <<GE:z\d+>>              GreaterThanOrEqual [<<Phi>>,<<Const25>>]
+  /// CHECK-DAG:                          If [<<GE>>]
+
+  /// CHECK-START: void Main.testGreaterCondition(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-DAG:                          If
+  /// CHECK-NOT:                          Phi
+  /// CHECK-NOT:                          GreaterThanOrEqual
+  /// CHECK-NOT:                          If
+  public static void testGreaterCondition(String[] args) {
+    int a = 42;;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = 22;
+    }
+    if (a < 25) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Test when comparing non constants.
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG: <<Const42:i\d+>>         IntConstant 42
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-DAG: <<Phi:i\d+>>             Phi [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Phi>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+
+  /// CHECK-START: void Main.testNonConstantEqual(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK-DAG: <<Const34:i\d+>>         IntConstant 34
+  /// CHECK-DAG:                          If
+  /// CHECK-DAG: <<StaticFieldGet:i\d+>>  StaticFieldGet
+  /// CHECK-NOT:                          Phi
+  /// CHECK-DAG: <<NotEqual:z\d+>>        NotEqual [<<Const34>>,<<StaticFieldGet>>]
+  /// CHECK-DAG:                          If [<<NotEqual>>]
+  public static void testNonConstantEqual(String[] args) {
+    int a = 42;
+    int b = otherStaticField;
+    if (args.length == 42) {
+      a = 34;
+    } else {
+      staticField = 32;
+      a = b;
+    }
+    if (a == b) {
+      staticField = 12;
+    } else {
+      staticField = 54;
+    }
+  }
+
+  // Make sure we don't "simplify" a loop and potentially turn it into
+  // an irreducible loop. The suspend check at the loop header prevents
+  // us from doing the simplification.
+
+  /// CHECK-START: void Main.testLoop(boolean) disassembly (after)
+  /// CHECK-DAG: SuspendCheck
+  /// CHECK:     irreducible:false
+  /// CHECK-NOT: irreducible:true
+  public static void testLoop(boolean c) {
+    while (true) {
+      if (c) {
+        if ($noinline$foo()) return;
+        c = false;
+      } else {
+        $noinline$foo();
+        c = true;
+      }
+    }
+  }
+
+  static boolean $noinline$foo() {
+    if (doThrow) throw new Error("");
+    return true;
+  }
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testSwitch(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  /// CHECK-NOT:  If
+  public static void testSwitch(String[] args) {
+    boolean cond = false;
+    switch (args.length) {
+      case 42:
+        staticField = 11;
+        cond = true;
+        break;
+      case 43:
+        staticField = 33;
+        cond = true;
+        break;
+      default:
+        cond = false;
+        break;
+    }
+    if (cond) {
+      // Redirect case 42 and 43 here.
+      staticField = 2;
+    }
+    // Redirect default here.
+  }
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination (before)
+  /// CHECK:      If
+  /// CHECK:      If
+
+  /// CHECK-START: void Main.testFP(java.lang.String[]) dead_code_elimination (after)
+  /// CHECK:      If
+  /// CHECK:      If
+  public static void testFP(String[] args) {
+    float f = 2.2f;
+    float nan = $noinline$getNaN();
+    if (args.length == 42) {
+      f = 4.3f;
+    } else {
+      staticField = 33;
+      f = nan;
+    }
+    if (f == nan) {
+      staticField = 5;
+    }
+  }
+
+  // No inline variant to avoid having the compiler see it's a NaN.
+  static float $noinline$getNaN() {
+    if (doThrow) throw new Error("");
+    return Float.NaN;
+  }
+
+  static boolean doThrow;
+  static int staticField;
+  static int otherStaticField;
+}
diff --git a/test/612-jit-dex-cache/expected.txt b/test/612-jit-dex-cache/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/612-jit-dex-cache/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/612-jit-dex-cache/info.txt b/test/612-jit-dex-cache/info.txt
new file mode 100644
index 0000000..e80f642
--- /dev/null
+++ b/test/612-jit-dex-cache/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT compiler which used to
+wrongly update the dex cache of a class loader.
diff --git a/test/612-jit-dex-cache/src-ex/B.java b/test/612-jit-dex-cache/src-ex/B.java
new file mode 100644
index 0000000..4da9a1d
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B {
+}
diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
new file mode 100644
index 0000000..1d6158a
--- /dev/null
+++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class LoadedByAppClassLoader {
+  public static void letMeInlineYou(A a) {
+    a.foo();
+  }
+
+  public static ClassLoader areYouB() {
+    // Ensure letMeInlineYou is JITted and tries to do inlining of A.foo.
+    // The compiler used to wrongly update the dex cache of letMeInlineYou's
+    // class loader.
+    Main.ensureJitCompiled(LoadedByAppClassLoader.class, "letMeInlineYou");
+    return OtherClass.getB().getClassLoader();
+  }
+}
+
+class OtherClass {
+  public static Class getB() {
+    // This used to return the B class of another class loader.
+    return B.class;
+  }
+}
diff --git a/test/612-jit-dex-cache/src/A.java b/test/612-jit-dex-cache/src/A.java
new file mode 100644
index 0000000..415c712
--- /dev/null
+++ b/test/612-jit-dex-cache/src/A.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+  public int foo() {
+    return 42;
+  }
+}
diff --git a/test/612-jit-dex-cache/src/B.java b/test/612-jit-dex-cache/src/B.java
new file mode 100644
index 0000000..46c878b
--- /dev/null
+++ b/test/612-jit-dex-cache/src/B.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B extends A {
+}
diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java
new file mode 100644
index 0000000..0e4bd22
--- /dev/null
+++ b/test/612-jit-dex-cache/src/Main.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+import dalvik.system.PathClassLoader;
+
+// ClassLoader not delegating for non java. packages.
+class DelegateLastPathClassLoader extends PathClassLoader {
+
+  public DelegateLastPathClassLoader(String dexPath, ClassLoader parent) {
+    super(dexPath, parent);
+  }
+
+  @Override
+  protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
+    if (!name.startsWith("java.")) {
+      try {
+        return findClass(name);
+      } catch (ClassNotFoundException ignore) {
+        // Ignore and fall through to parent class loader.
+      }
+    }
+    return super.loadClass(name, resolve);
+  }
+}
+
+public class Main {
+
+   private static Class classFromDifferentLoader() throws Exception {
+     final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar";
+     ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader());
+     return loader.loadClass("LoadedByAppClassLoader");
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Class cls = classFromDifferentLoader();
+    Method m = cls.getDeclaredMethod("letMeInlineYou", A.class);
+    B b = new B();
+    // Invoke the method enough times to get an inline cache and get JITted.
+    for (int i = 0; i < 10000; ++i) {
+      m.invoke(null, b);
+    }
+    m = cls.getDeclaredMethod("areYouB", null);
+    ClassLoader loader = (ClassLoader) m.invoke(null);
+    if (loader != cls.getClassLoader()) {
+      throw new Error("Wrong class loader");
+    }
+  }
+
+  public static native void ensureJitCompiled(Class cls, String method_name);
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 75e74ec..7813d16 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -47,8 +47,7 @@
   570-checker-osr/osr.cc \
   595-profile-saving/profile-saving.cc \
   596-app-images/app_images.cc \
-  597-deopt-new-string/deopt.cc \
-  604-hot-static-interface/hot_static_interface.cc
+  597-deopt-new-string/deopt.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index dd6b6f3..8f8b667 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -527,7 +527,7 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
 #      instruction yet (b/26601270).
 # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
 #      not yet handled in the read barrier configuration.
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index fd41fd2..e70a95c 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -18,10 +18,14 @@
 
 #include "base/logging.h"
 #include "dex_file-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "nth_caller_visitor.h"
+#include "oat_quick_method_header.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
 #include "stack.h"
 #include "thread-inl.h"
 
@@ -116,4 +120,38 @@
   return JNI_TRUE;
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
+                                                             jclass,
+                                                             jclass cls,
+                                                             jstring method_name) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
+
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  OatQuickMethodHeader* header = nullptr;
+  // Make sure there is a profiling info, required by the compiler.
+  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+  while (true) {
+    header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
+    if (code_cache->ContainsPc(header->GetCode())) {
+      break;
+    } else {
+      // Sleep to yield to the compiler thread.
+      usleep(1000);
+      // Will either ensure it's compiled or do the compilation itself.
+      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/test/run-test b/test/run-test
index 3ae063a..1ef5428 100755
--- a/test/run-test
+++ b/test/run-test
@@ -37,7 +37,7 @@
 if [ -z "$TMPDIR" ]; then
   tmp_dir="/tmp/$USER/${test_dir}"
 else
-  tmp_dir="${TMPDIR}/$USER/${test_dir}"
+  tmp_dir="${TMPDIR}/${test_dir}"
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
@@ -78,9 +78,14 @@
     export ANDROID_BUILD_TOP=$oldwd
 fi
 
+# ANDROID_HOST_OUT is not set in a build environment.
+if [ -z "$ANDROID_HOST_OUT" ]; then
+    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
+fi
+
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
 if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
+  export JACK_CLASSPATH="${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
 export JACK="$JACK -g -cp $JACK_CLASSPATH"
@@ -462,10 +467,6 @@
     fi
 elif [ "$runtime" = "art" ]; then
     if [ "$target_mode" = "no" ]; then
-        # ANDROID_HOST_OUT is not set in a build environment.
-        if [ -z "$ANDROID_HOST_OUT" ]; then
-            export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
-        fi
         guess_host_arch_name
         run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}${multi_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index f25fb98..996f2f8 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -253,5 +253,12 @@
   names: ["jsr166.CollectionTest#testEmptyMeansEmpty",
           "jsr166.Collection8Test#testForEach",
           "jsr166.Collection8Test#testForEachConcurrentStressTest"]
+},
+{
+  description: "Flaky test",
+  result: EXEC_FAILED,
+  bug: 30107038,
+  modes: [device],
+  names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 976e1d8..bdb2d4b 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,8 +19,12 @@
   exit 1
 fi
 
+if [ -z "$ANDROID_HOST_OUT" ] ; then
+  ANDROID_HOST_OUT=${OUT_DIR-$ANDROID_BUILD_TOP/out}/host/linux-x86
+fi
+
 # Jar containing all the tests.
-test_jack=${OUT_DIR-out}/host/common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
+test_jack=${ANDROID_HOST_OUT}/../common/obj/JAVA_LIBRARIES/apache-harmony-jdwp-tests-hostdex_intermediates/classes.jack
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 3e2a512..3605aa0 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -19,12 +19,17 @@
   exit 1
 fi
 
+if [ -z "$ANDROID_PRODUCT_OUT" ] ; then
+  JAVA_LIBRARIES=out/target/common/obj/JAVA_LIBRARIES
+else
+  JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
+fi
+
 # Jar containing jsr166 tests.
-jsr166_test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/classes.jack
+jsr166_test_jack=${JAVA_LIBRARIES}/jsr166-tests_intermediates/classes.jack
 
 # Jar containing all the other tests.
-test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jack
-
+test_jack=${JAVA_LIBRARIES}/core-tests_intermediates/classes.jack
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build core-tests, jsr166-tests and vogar: \