Merge "optimizing: Build HConstructorFence for HNewArray/HNewInstance nodes"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index c4374f7..0a465c4 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -83,16 +83,16 @@
 ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
 
 $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 $(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
-	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+	 $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^)
 
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
@@ -171,6 +171,12 @@
 # TODO: document why this is needed.
 ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_DEFAULT_64) $(HOST_CORE_IMAGE_DEFAULT_32)
 
+# The dexdiag test requires the dexdiag utility.
+ART_GTEST_dexdiag_test_HOST_DEPS := \
+  $(HOST_OUT_EXECUTABLES)/dexdiag
+ART_GTEST_dexdiag_test_TARGET_DEPS := \
+  dexdiag
+
 # The dexdump test requires an image and the dexdump utility.
 # TODO: rename into dexdump when migration completes
 ART_GTEST_dexdump_test_HOST_DEPS := \
@@ -244,6 +250,7 @@
     art_compiler_tests \
     art_compiler_host_tests \
     art_dex2oat_tests \
+    art_dexdiag_tests \
     art_dexdump_tests \
     art_dexlayout_tests \
     art_dexlist_tests \
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a2b07af..df896dc 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -115,6 +115,7 @@
                 "optimizing/intrinsics_arm.cc",
                 "optimizing/intrinsics_arm_vixl.cc",
                 "optimizing/nodes_shared.cc",
+                "optimizing/scheduler_arm.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
diff --git a/compiler/compiled_class.h b/compiler/compiled_class.h
deleted file mode 100644
index 06ce946..0000000
--- a/compiler/compiled_class.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_COMPILED_CLASS_H_
-#define ART_COMPILER_COMPILED_CLASS_H_
-
-#include "mirror/class.h"
-
-namespace art {
-
-class CompiledClass {
- public:
-  explicit CompiledClass(mirror::Class::Status status) : status_(status) {}
-  ~CompiledClass() {}
-  mirror::Class::Status GetStatus() const {
-    return status_;
-  }
-  void SetStatus(mirror::Class::Status status) {
-    status_ = status;
-  }
- private:
-  mirror::Class::Status status_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_COMPILED_CLASS_H_
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index d0f66e2..912c964 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -122,10 +122,8 @@
     kMethod,
     kCall,
     kCallRelative,            // NOTE: Actual patching is instruction_set-dependent.
-    kType,
     kTypeRelative,            // NOTE: Actual patching is instruction_set-dependent.
     kTypeBssEntry,            // NOTE: Actual patching is instruction_set-dependent.
-    kString,
     kStringRelative,          // NOTE: Actual patching is instruction_set-dependent.
     kStringBssEntry,          // NOTE: Actual patching is instruction_set-dependent.
     kDexCacheArray,           // NOTE: Actual patching is instruction_set-dependent.
@@ -156,14 +154,6 @@
     return patch;
   }
 
-  static LinkerPatch TypePatch(size_t literal_offset,
-                               const DexFile* target_dex_file,
-                               uint32_t target_type_idx) {
-    LinkerPatch patch(literal_offset, Type::kType, target_dex_file);
-    patch.type_idx_ = target_type_idx;
-    return patch;
-  }
-
   static LinkerPatch RelativeTypePatch(size_t literal_offset,
                                        const DexFile* target_dex_file,
                                        uint32_t pc_insn_offset,
@@ -184,14 +174,6 @@
     return patch;
   }
 
-  static LinkerPatch StringPatch(size_t literal_offset,
-                                 const DexFile* target_dex_file,
-                                 uint32_t target_string_idx) {
-    LinkerPatch patch(literal_offset, Type::kString, target_dex_file);
-    patch.string_idx_ = target_string_idx;
-    return patch;
-  }
-
   static LinkerPatch RelativeStringPatch(size_t literal_offset,
                                          const DexFile* target_dex_file,
                                          uint32_t pc_insn_offset,
@@ -265,29 +247,25 @@
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == Type::kType ||
-           patch_type_ == Type::kTypeRelative ||
+    DCHECK(patch_type_ == Type::kTypeRelative ||
            patch_type_ == Type::kTypeBssEntry);
     return target_dex_file_;
   }
 
   dex::TypeIndex TargetTypeIndex() const {
-    DCHECK(patch_type_ == Type::kType ||
-           patch_type_ == Type::kTypeRelative ||
+    DCHECK(patch_type_ == Type::kTypeRelative ||
            patch_type_ == Type::kTypeBssEntry);
     return dex::TypeIndex(type_idx_);
   }
 
   const DexFile* TargetStringDexFile() const {
-    DCHECK(patch_type_ == Type::kString ||
-           patch_type_ == Type::kStringRelative ||
+    DCHECK(patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry);
     return target_dex_file_;
   }
 
   dex::StringIndex TargetStringIndex() const {
-    DCHECK(patch_type_ == Type::kString ||
-           patch_type_ == Type::kStringRelative ||
+    DCHECK(patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry);
     return dex::StringIndex(string_idx_);
   }
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
index 99ee875..81a2cde 100644
--- a/compiler/compiled_method_test.cc
+++ b/compiler/compiled_method_test.cc
@@ -62,10 +62,38 @@
       LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u),
       LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u),
       LinkerPatch::RelativeCodePatch(16u, dex_file2, 1001u),
-      LinkerPatch::TypePatch(16u, dex_file1, 1000u),
-      LinkerPatch::TypePatch(16u, dex_file1, 1001u),
-      LinkerPatch::TypePatch(16u, dex_file2, 1000u),
-      LinkerPatch::TypePatch(16u, dex_file2, 1001u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1001u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1001u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1001u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1001u),
       LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u),
       LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u),
       LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u),
@@ -74,10 +102,15 @@
       LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u),
       LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u),
       LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u),
+      LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 0u),
+      LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 1u),
+      LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u),
+      LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u),
+
       LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
       LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
       LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
-      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),
+      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),  // Index 3.
       LinkerPatch::CodePatch(32u, dex_file1, 1000u),
       LinkerPatch::CodePatch(32u, dex_file1, 1001u),
       LinkerPatch::CodePatch(32u, dex_file2, 1000u),
@@ -86,10 +119,38 @@
       LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u),
       LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u),
       LinkerPatch::RelativeCodePatch(32u, dex_file2, 1001u),
-      LinkerPatch::TypePatch(32u, dex_file1, 1000u),
-      LinkerPatch::TypePatch(32u, dex_file1, 1001u),
-      LinkerPatch::TypePatch(32u, dex_file2, 1000u),
-      LinkerPatch::TypePatch(32u, dex_file2, 1001u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1001u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1001u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1001u),
       LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u),
       LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u),
       LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u),
@@ -98,7 +159,12 @@
       LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u),
       LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u),
       LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u),
-      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch as index 3.
+      LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 0u),
+      LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 1u),
+      LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u),
+      LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u),
+
+      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch at index 3.
   };
   constexpr size_t last_index = arraysize(patches) - 1u;
 
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 3f0df3b..0338cfd 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -82,7 +82,12 @@
     // TODO: Investigate why are we doing the work again for this method and try to avoid it.
     LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
     if (!Runtime::Current()->UseJitCompilation()) {
-      DCHECK_EQ(existing->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+      if (kIsDebugBuild) {
+        auto ex_set = existing->GetSafeCastSet();
+        auto ve_set = verified_method->GetSafeCastSet();
+        CHECK_EQ(ex_set == nullptr, ve_set == nullptr);
+        CHECK((ex_set == nullptr) || (ex_set->size() == ve_set->size()));
+      }
     }
     // Let the unique_ptr delete the new verified method since there was already an existing one
     // registered. It is unsafe to replace the existing one since the JIT may be using it to
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 608a18a..e46dc59 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -49,7 +49,10 @@
 }
 
 bool VerifiedMethod::IsSafeCast(uint32_t pc) const {
-  return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
+  if (safe_cast_set_ == nullptr) {
+    return false;
+  }
+  return std::binary_search(safe_cast_set_->begin(), safe_cast_set_->end(), pc);
 }
 
 void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) {
@@ -94,12 +97,16 @@
                                                            /* strict */ true,
                                                            /* assignable */ true);
         }
+        if (safe_cast_set_ == nullptr) {
+          safe_cast_set_.reset(new SafeCastSet());
+        }
         // Verify ordering for push_back() to the sorted vector.
-        DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc);
-        safe_cast_set_.push_back(dex_pc);
+        DCHECK(safe_cast_set_->empty() || safe_cast_set_->back() < dex_pc);
+        safe_cast_set_->push_back(dex_pc);
       }
     }
   }
+  DCHECK(safe_cast_set_ == nullptr || !safe_cast_set_->empty());
 }
 
 }  // namespace art
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 439e69e..64b3f44 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -43,8 +43,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   ~VerifiedMethod() = default;
 
-  const SafeCastSet& GetSafeCastSet() const {
-    return safe_cast_set_;
+  const SafeCastSet* GetSafeCastSet() const {
+    return safe_cast_set_.get();
   }
 
   // Returns true if the cast can statically be verified to be redundant
@@ -69,7 +69,7 @@
   void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  SafeCastSet safe_cast_set_;
+  std::unique_ptr<SafeCastSet> safe_cast_set_;
 
   const uint32_t encountered_error_types_;
   const bool has_runtime_throw_;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fbfa087..c2d792d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -36,7 +36,6 @@
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "class_linker-inl.h"
-#include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "compiler_callbacks.h"
@@ -317,11 +316,6 @@
 }
 
 CompilerDriver::~CompilerDriver() {
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, compiled_classes_lock_);
-    STLDeleteValues(&compiled_classes_);
-  }
   compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
                                  CompiledMethod* method) {
     if (method != nullptr) {
@@ -1978,8 +1972,7 @@
         if (compiler_only_verifies) {
           // Just update the compiled_classes_ map. The compiler doesn't need to resolve
           // the type.
-          compiled_classes_.Overwrite(
-              ClassReference(dex_file, i), new CompiledClass(mirror::Class::kStatusVerified));
+          compiled_classes_.Overwrite(ClassReference(dex_file, i), mirror::Class::kStatusVerified);
         } else {
           // Update the class status, so later compilation stages know they don't need to verify
           // the class.
@@ -2030,16 +2023,18 @@
     }
   }
 
-  // Note: verification should not be pulling in classes anymore when compiling the boot image,
-  //       as all should have been resolved before. As such, doing this in parallel should still
-  //       be deterministic.
+  // Verification updates VerifierDeps and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* verify_thread_pool =
+      force_determinism ? single_thread_pool_.get() : parallel_thread_pool_.get();
+  size_t verify_thread_count = force_determinism ? 1U : parallel_thread_count_;
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     VerifyDexFile(jclass_loader,
                   *dex_file,
                   dex_files,
-                  parallel_thread_pool_.get(),
-                  parallel_thread_count_,
+                  verify_thread_pool,
+                  verify_thread_count,
                   timings);
   }
 
@@ -2688,14 +2683,15 @@
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
 }
 
-CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
+bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const {
+  DCHECK(status != nullptr);
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
-  ClassTable::const_iterator it = compiled_classes_.find(ref);
+  ClassStateTable::const_iterator it = compiled_classes_.find(ref);
   if (it == compiled_classes_.end()) {
-    return nullptr;
+    return false;
   }
-  CHECK(it->second != nullptr);
-  return it->second;
+  *status = it->second;
+  return true;
 }
 
 void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status status) {
@@ -2717,12 +2713,11 @@
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
   auto it = compiled_classes_.find(ref);
   if (it == compiled_classes_.end()) {
-    CompiledClass* compiled_class = new CompiledClass(status);
-    compiled_classes_.Overwrite(ref, compiled_class);
-  } else if (status > it->second->GetStatus()) {
+    compiled_classes_.Overwrite(ref, status);
+  } else if (status > it->second) {
     // Update the status if we now have a greater one. This happens with vdex,
     // which records a class is verified, but does not resolve it.
-    it->second->SetStatus(status);
+    it->second = status;
   }
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fbab9df..e0d97b7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -56,7 +56,6 @@
 }  // namespace verifier
 
 class BitVector;
-class CompiledClass;
 class CompiledMethod;
 class CompilerOptions;
 class DexCompilationUnit;
@@ -164,7 +163,7 @@
   std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
   std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
 
-  CompiledClass* GetCompiledClass(ClassReference ref) const
+  bool GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const
       REQUIRES(!compiled_classes_lock_);
 
   CompiledMethod* GetCompiledMethod(MethodReference ref) const;
@@ -505,10 +504,10 @@
   std::map<ClassReference, bool> requires_constructor_barrier_
       GUARDED_BY(requires_constructor_barrier_lock_);
 
-  typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
+  using ClassStateTable = SafeMap<const ClassReference, mirror::Class::Status>;
   // All class references that this compiler has compiled.
   mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
+  ClassStateTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
 
   typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
 
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 17854fd..26ea39f 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,7 +23,6 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
-#include "compiled_class.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "gc/heap.h"
@@ -339,10 +338,11 @@
     ASSERT_NE(klass, nullptr);
     EXPECT_TRUE(klass->IsVerified());
 
-    CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(
-        ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_));
-    ASSERT_NE(compiled_class, nullptr);
-    EXPECT_EQ(compiled_class->GetStatus(), mirror::Class::kStatusVerified);
+    mirror::Class::Status status;
+    bool found = compiler_driver_->GetCompiledClass(
+        ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_), &status);
+    ASSERT_TRUE(found);
+    EXPECT_EQ(status, mirror::Class::kStatusVerified);
   }
 };
 
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 957ea99..5a82021 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -237,6 +237,10 @@
 
   bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
 
+  void SetNonPic() {
+    compile_pic_ = false;
+  }
+
   const std::string& GetDumpCfgFileName() const {
     return dump_cfg_file_name_;
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 952a7c6..4d6db47 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -48,6 +48,7 @@
 #include "image.h"
 #include "imt_conflict_table.h"
 #include "intern_table.h"
+#include "jni_internal.h"
 #include "linear_alloc.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
@@ -114,15 +115,19 @@
   return false;
 }
 
-static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
+static void ClearDexFileCookieCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
   Class* klass = obj->GetClass();
-  CHECK_NE(Class::PrettyClass(klass), "com.android.dex.Dex");
+  if (klass == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_DexFile)) {
+    ArtField* field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
+    // Null out the cookie to enable determinism. b/34090128
+    field->SetObject</*kTransactionActive*/false>(obj, nullptr);
+  }
 }
 
-static void CheckNoDexObjects() {
-  ScopedObjectAccess soa(Thread::Current());
-  Runtime::Current()->GetHeap()->VisitObjects(CheckNoDexObjectsCallback, nullptr);
+static void ClearDexFileCookies() REQUIRES_SHARED(Locks::mutator_lock_) {
+  Runtime::Current()->GetHeap()->VisitObjects(ClearDexFileCookieCallback, nullptr);
 }
 
 bool ImageWriter::PrepareImageAddressSpace() {
@@ -131,23 +136,18 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
-    if (!compile_app_image_) {
+    if (compile_app_image_) {
+      // Clear dex file cookies for app images to enable app image determinism. This is required
+      // since the cookie field contains long pointers to DexFiles which are not deterministic.
+      // b/34090128
+      ClearDexFileCookies();
+    } else {
       // Avoid for app image since this may increase RAM and image size.
       ComputeLazyFieldsForImageClasses();  // Add useful information
     }
   }
   heap->CollectGarbage(false);  // Remove garbage.
 
-  // Dex caches must not have their dex fields set in the image. These are memory buffers of mapped
-  // dex files.
-  //
-  // We may open them in the unstarted-runtime code for class metadata. Their fields should all be
-  // reset in PruneNonImageClasses and the objects reclaimed in the GC. Make sure that's actually
-  // true.
-  if (kIsDebugBuild) {
-    CheckNoDexObjects();
-  }
-
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     CheckNonImageClassesRemoved();
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 63c23cb..c8a0119 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -28,6 +28,9 @@
 // The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
 // (kNoSideEffects), and it is also OK to remove it if it's unused.
 
+// Note: Thread.interrupted is marked with kAllSideEffects due to the lack of finer grain
+// side effects representation.
+
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
   V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -154,7 +157,8 @@
   V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
   V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
   V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
-  V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
+  V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") \
+  V(ThreadInterrupted, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/Thread;", "interrupted", "()Z")
 
 #endif  // ART_COMPILER_INTRINSICS_LIST_H_
 #undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index ad951bc..fed1f48 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -105,7 +105,7 @@
       /* implicit_null_checks */ true,
       /* implicit_so_checks */ true,
       /* implicit_suspend_checks */ false,
-      /* pic */ true,  // TODO: Support non-PIC in optimizing.
+      /* pic */ false,
       /* verbose_methods */ nullptr,
       /* init_failure_output */ nullptr,
       /* abort_on_hard_verifier_failure */ false,
@@ -117,6 +117,9 @@
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
+  // JIT is never PIC, no matter what the runtime compiler options specify.
+  compiler_options_->SetNonPic();
+
   const InstructionSet instruction_set = kRuntimeISA;
   for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) {
     VLOG(compiler) << "JIT compiler option " << option;
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index f55d5a6..c1ac230 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -249,7 +249,7 @@
       // All remaining method call patches will be handled by this thunk.
       DCHECK(!unprocessed_method_call_patches_.empty());
       DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(),
-                MaxPositiveDisplacement(ThunkType::kMethodCall));
+                MaxPositiveDisplacement(GetMethodCallKey()));
       unprocessed_method_call_patches_.clear();
     }
   }
@@ -271,8 +271,8 @@
   DCHECK(method_call_thunk_ != nullptr);
   // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
   uint32_t displacement = target_offset - patch_offset;
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
   // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
   if (displacement > max_positive_displacement && displacement < -max_negative_displacement) {
     // Unwritten thunks have higher offsets, check if it's within range.
@@ -299,29 +299,40 @@
   if (data.HasWrittenOffset()) {
     uint32_t offset = data.LastWrittenOffset();
     DCHECK_LT(offset, patch_offset);
-    if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) {
+    if (patch_offset - offset <= MaxNegativeDisplacement(key)) {
       return offset;
     }
   }
   DCHECK(data.HasPendingOffset());
   uint32_t offset = data.GetPendingOffset();
   DCHECK_GT(offset, patch_offset);
-  DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType()));
+  DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key));
   return offset;
 }
 
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() {
+  return ThunkKey(ThunkType::kMethodCall);
+}
+
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
+    const LinkerPatch& patch) {
+  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
+  return ThunkKey(ThunkType::kBakerReadBarrier,
+                  patch.GetBakerCustomValue1(),
+                  patch.GetBakerCustomValue2());
+}
+
 void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
                                             uint32_t code_offset) {
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
     uint32_t patch_offset = code_offset + patch.LiteralOffset();
-    ThunkType key_type = static_cast<ThunkType>(-1);
+    ThunkKey key(static_cast<ThunkType>(-1));
     ThunkData* old_data = nullptr;
     if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
-      key_type = ThunkType::kMethodCall;
+      key = GetMethodCallKey();
       unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod());
       if (method_call_thunk_ == nullptr) {
-        ThunkKey key(key_type, ThunkParams{{ 0u, 0u }});  // NOLINT(whitespace/braces)
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
         auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset));
         method_call_thunk_ = &it->second;
         AddUnreservedThunk(method_call_thunk_);
@@ -329,11 +340,10 @@
         old_data = method_call_thunk_;
       }
     } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
-      ThunkKey key = GetBakerReadBarrierKey(patch);
-      key_type = key.GetType();
+      key = GetBakerThunkKey(patch);
       auto lb = thunks_.lower_bound(key);
       if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
         auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset));
         AddUnreservedThunk(&it->second);
       } else {
@@ -342,16 +352,16 @@
     }
     if (old_data != nullptr) {
       // Shared path where an old thunk may need an update.
-      DCHECK(key_type != static_cast<ThunkType>(-1));
+      DCHECK(key.GetType() != static_cast<ThunkType>(-1));
       DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset);
       if (old_data->NeedsNextThunk()) {
         // Patches for a method are ordered by literal offset, so if we still need to place
         // this thunk for a previous patch, that thunk shall be in range for this patch.
-        DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type));
+        DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key));
       } else {
         if (!old_data->HasReservedOffset() ||
-            patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) {
-          old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type));
+            patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) {
+          old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key));
           AddUnreservedThunk(old_data);
         }
       }
@@ -385,8 +395,8 @@
   DCHECK(!unreserved_thunks_.empty());
   DCHECK(!unprocessed_method_call_patches_.empty());
   DCHECK(method_call_thunk_ != nullptr);
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
   // Process as many patches as possible, stop only on unresolved targets or calls too far back.
   while (!unprocessed_method_call_patches_.empty()) {
     MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod();
@@ -439,8 +449,8 @@
 }
 
 inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset,
-                                                               ThunkType type) {
-  return RoundDown(patch_offset + MaxPositiveDisplacement(type),
+                                                               const ThunkKey& key) {
+  return RoundDown(patch_offset + MaxPositiveDisplacement(key),
                    GetInstructionSetAlignment(instruction_set_));
 }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 47f840f..5197ce2 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -42,76 +42,30 @@
 
   enum class ThunkType {
     kMethodCall,              // Method call thunk.
-    kBakerReadBarrierField,   // Baker read barrier, load field or array element at known offset.
-    kBakerReadBarrierArray,   // Baker read barrier, array load with index in register.
-    kBakerReadBarrierRoot,    // Baker read barrier, GC root load.
-  };
-
-  struct BakerReadBarrierFieldParams {
-    uint32_t holder_reg;      // Holder object for reading lock word.
-    uint32_t base_reg;        // Base register, different from holder for large offset.
-                              // If base differs from holder, it should be a pre-defined
-                              // register to limit the number of thunks we need to emit.
-                              // The offset is retrieved using introspection.
-  };
-
-  struct BakerReadBarrierArrayParams {
-    uint32_t base_reg;        // Reference to the start of the data.
-    uint32_t dummy;           // Dummy field.
-                              // The index register is retrieved using introspection
-                              // to limit the number of thunks we need to emit.
-  };
-
-  struct BakerReadBarrierRootParams {
-    uint32_t root_reg;        // The register holding the GC root.
-    uint32_t dummy;           // Dummy field.
-  };
-
-  struct RawThunkParams {
-    uint32_t first;
-    uint32_t second;
-  };
-
-  union ThunkParams {
-    RawThunkParams raw_params;
-    BakerReadBarrierFieldParams field_params;
-    BakerReadBarrierArrayParams array_params;
-    BakerReadBarrierRootParams root_params;
-    static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check");
-    static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check");
-    static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check");
+    kBakerReadBarrier,        // Baker read barrier.
   };
 
   class ThunkKey {
    public:
-    ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { }
+    explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u)
+        : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { }
 
     ThunkType GetType() const {
       return type_;
     }
 
-    BakerReadBarrierFieldParams GetFieldParams() const {
-      DCHECK(type_ == ThunkType::kBakerReadBarrierField);
-      return params_.field_params;
+    uint32_t GetCustomValue1() const {
+      return custom_value1_;
     }
 
-    BakerReadBarrierArrayParams GetArrayParams() const {
-      DCHECK(type_ == ThunkType::kBakerReadBarrierArray);
-      return params_.array_params;
-    }
-
-    BakerReadBarrierRootParams GetRootParams() const {
-      DCHECK(type_ == ThunkType::kBakerReadBarrierRoot);
-      return params_.root_params;
-    }
-
-    RawThunkParams GetRawParams() const {
-      return params_.raw_params;
+    uint32_t GetCustomValue2() const {
+      return custom_value2_;
     }
 
    private:
     ThunkType type_;
-    ThunkParams params_;
+    uint32_t custom_value1_;
+    uint32_t custom_value2_;
   };
 
   class ThunkKeyCompare {
@@ -120,13 +74,16 @@
       if (lhs.GetType() != rhs.GetType()) {
         return lhs.GetType() < rhs.GetType();
       }
-      if (lhs.GetRawParams().first != rhs.GetRawParams().first) {
-        return lhs.GetRawParams().first < rhs.GetRawParams().first;
+      if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) {
+        return lhs.GetCustomValue1() < rhs.GetCustomValue1();
       }
-      return lhs.GetRawParams().second < rhs.GetRawParams().second;
+      return lhs.GetCustomValue2() < rhs.GetCustomValue2();
     }
   };
 
+  static ThunkKey GetMethodCallKey();
+  static ThunkKey GetBakerThunkKey(const LinkerPatch& patch);
+
   uint32_t ReserveSpaceInternal(uint32_t offset,
                                 const CompiledMethod* compiled_method,
                                 MethodReference method_ref,
@@ -136,10 +93,9 @@
   uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset,
                                            uint32_t target_offset);
 
-  virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0;
   virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0;
-  virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0;
-  virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0;
+  virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0;
+  virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0;
 
  private:
   class ThunkData;
@@ -149,7 +105,7 @@
 
   void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref);
 
-  uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type);
+  uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key);
 
   RelativePatcherTargetProvider* const provider_;
   const InstructionSet instruction_set_;
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 1a5d79c..aa5a945 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -16,9 +16,16 @@
 
 #include "linker/arm/relative_patcher_thumb2.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
 #include "compiled_method.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "lock_word.h"
+#include "mirror/object.h"
+#include "mirror/array-inl.h"
+#include "read_barrier.h"
+#include "utils/arm/assembler_arm_vixl.h"
 
 namespace art {
 namespace linker {
@@ -32,6 +39,12 @@
 constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
 constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement;
 
+// Maximum positive and negative displacement for a conditional branch measured from the patch
+// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured
+// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.)
+constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement;
+constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement;
+
 Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
     : ArmBaseRelativePatcher(provider, kThumb2) {
 }
@@ -84,29 +97,259 @@
   SetInsn32(code, literal_offset, insn);
 }
 
-void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                        uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
+void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+                                                        const LinkerPatch& patch,
+                                                        uint32_t patch_offset) {
+  DCHECK_ALIGNED(patch_offset, 2u);
+  uint32_t literal_offset = patch.LiteralOffset();
+  DCHECK_ALIGNED(literal_offset, 2u);
+  DCHECK_LT(literal_offset, code->size());
+  uint32_t insn = GetInsn32(code, literal_offset);
+  DCHECK_EQ(insn, 0xf0408000);  // BNE +0 (unpatched)
+  ThunkKey key = GetBakerThunkKey(patch);
+  if (kIsDebugBuild) {
+    const uint32_t encoded_data = key.GetCustomValue1();
+    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+    // Check that the next instruction matches the expected LDR.
+    switch (kind) {
+      case BakerReadBarrierKind::kField: {
+        BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+        if (width == BakerReadBarrierWidth::kWide) {
+          DCHECK_GE(code->size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+          // LDR (immediate), encoding T3, with correct base_reg.
+          CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+        } else {
+          DCHECK_GE(code->size() - literal_offset, 6u);
+          uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
+          // LDR (immediate), encoding T1, with correct base_reg.
+          CheckValidReg(next_insn & 0x7u);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+        }
+        break;
+      }
+      case BakerReadBarrierKind::kArray: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+        // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+        CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+        CheckValidReg(next_insn & 0xf);  // Check index register
+        break;
+      }
+      case BakerReadBarrierKind::kGcRoot: {
+        BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+        if (width == BakerReadBarrierWidth::kWide) {
+          DCHECK_GE(literal_offset, 4u);
+          uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+          // LDR (immediate), encoding T3, with correct root_reg.
+          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+        } else {
+          DCHECK_GE(literal_offset, 2u);
+          uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
+          // LDR (immediate), encoding T1, with correct root_reg.
+          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+        }
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+        UNREACHABLE();
+    }
+  }
+  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+  DCHECK_ALIGNED(target_offset, 4u);
+  uint32_t disp = target_offset - (patch_offset + kPcDisplacement);
+  DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu);   // 21-bit signed.
+  insn |= ((disp << (26 - 20)) & 0x04000000u) |           // Shift bit 20 to 26, "S".
+          ((disp >> (19 - 11)) & 0x00000800u) |           // Shift bit 19 to 13, "J1".
+          ((disp >> (18 - 13)) & 0x00002000u) |           // Shift bit 18 to 11, "J2".
+          ((disp << (16 - 12)) & 0x003f0000u) |           // Shift bits 12-17 to 16-25, "imm6".
+          ((disp >> (1 - 0)) & 0x000007ffu);              // Shift bits 1-12 to 0-11, "imm11".
+  SetInsn32(code, literal_offset, insn);
 }
 
-ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey(
-    const LinkerPatch& patch ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-  UNREACHABLE();
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
+                                     vixl::aarch32::Register base_reg,
+                                     vixl::aarch32::MemOperand& lock_word,
+                                     vixl::aarch32::Label* slow_path,
+                                     int32_t raw_ldr_offset) {
+  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip, lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+  __ B(ne, slow_path, /* is_far_target */ false);
+  __ Add(lr, lr, raw_ldr_offset);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+  __ Bx(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler,
+                                                         uint32_t encoded_data) {
+  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
+      // that performs further checks on the reference and marks it if needed.
+      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      // If base_reg differs from holder_reg, the offset was too large and we must have
+      // emitted an explicit null check before the load. Otherwise, we need to null-check
+      // the holder as we do not necessarily do that check before going to the thunk.
+      vixl::aarch32::Label throw_npe;
+      if (holder_reg.Is(base_reg)) {
+        __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
+      }
+      vixl::aarch32::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+          : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      Register ep_reg(kBakerCcEntrypointRegister);
+      if (width == BakerReadBarrierWidth::kWide) {
+        MemOperand ldr_half_address(lr, ldr_offset + 2);
+        __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
+        __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
+        __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
+      } else {
+        MemOperand ldr_address(lr, ldr_offset);
+        __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
+        __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
+               ep_reg,                        // for narrow LDR.
+               Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+        __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
+        __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
+      }
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Bx(ep_reg);                          // Jump to the entrypoint.
+      if (holder_reg.Is(base_reg)) {
+        // Add null check slow path. The stack map is at the address pointed to by LR.
+        __ Bind(&throw_npe);
+        int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
+        __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset));
+        __ Bx(ip);
+      }
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+      DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl::aarch32::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+      const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      MemOperand ldr_address(lr, ldr_offset + 2);
+      __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
+                                              // i.e. Rm+32 because the scale in imm2 is 2.
+      Register ep_reg(kBakerCcEntrypointRegister);  // Insert ip to the entrypoint address to create
+      __ Bfi(ep_reg, ip, 3, 6);               // a switch case target based on the index register.
+      __ Mov(ip, base_reg);                   // Move the base register to ip0.
+      __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl::aarch32::Label return_label, not_marked, forwarding_address;
+      __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+      MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip, lock_word);
+      __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+      __ B(eq, &not_marked);
+      __ Bind(&return_label);
+      __ Bx(lr);
+      __ Bind(&not_marked);
+      static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+                    "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+                    " the highest bits and the 'forwarding address' state to have all bits set");
+      __ Cmp(ip, Operand(0xc0000000));
+      __ B(hs, &forwarding_address);
+      // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+      // to art_quick_read_barrier_mark_introspection_gc_roots.
+      Register ep_reg(kBakerCcEntrypointRegister);
+      int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+          : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+      __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+      __ Mov(ip, root_reg);
+      __ Bx(ep_reg);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+      __ Bx(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
 }
 
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
-  DCHECK(key.GetType() == ThunkType::kMethodCall);
-  // The thunk just uses the entry point in the ArtMethod. This works even for calls
-  // to the generic JNI and interpreter trampolines.
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  arm::Thumb2Assembler assembler(&arena);
-  assembler.LoadFromOffset(
-      arm::kLoadWord, arm::PC, arm::R0,
-      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
-  assembler.bkpt(0);
+  arm::ArmVIXLAssembler assembler(&arena);
+
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      assembler.LoadFromOffset(
+          arm::kLoadWord,
+          vixl::aarch32::pc,
+          vixl::aarch32::r0,
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      __ Bkpt(0);
+      break;
+    case ThunkType::kBakerReadBarrier:
+      CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
+      break;
+  }
+
   assembler.FinalizeCode();
   std::vector<uint8_t> thunk_code(assembler.CodeSize());
   MemoryRegion code(thunk_code.data(), thunk_code.size());
@@ -114,19 +357,29 @@
   return thunk_code;
 }
 
-uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
-  DCHECK(type == ThunkType::kMethodCall);
-  return kMaxMethodCallPositiveDisplacement;
+#undef __
+
+uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      return kMaxMethodCallPositiveDisplacement;
+    case ThunkType::kBakerReadBarrier:
+      return kMaxBcondPositiveDisplacement;
+  }
 }
 
-uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
-  DCHECK(type == ThunkType::kMethodCall);
-  return kMaxMethodCallNegativeDisplacement;
+uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall:
+      return kMaxMethodCallNegativeDisplacement;
+    case ThunkType::kBakerReadBarrier:
+      return kMaxBcondNegativeDisplacement;
+  }
 }
 
 void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
   DCHECK_LE(offset + 4u, code->size());
-  DCHECK_EQ(offset & 1u, 0u);
+  DCHECK_ALIGNED(offset, 2u);
   uint8_t* addr = &(*code)[offset];
   addr[0] = (value >> 16) & 0xff;
   addr[1] = (value >> 24) & 0xff;
@@ -136,7 +389,7 @@
 
 uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
   DCHECK_LE(offset + 4u, code.size());
-  DCHECK_EQ(offset & 1u, 0u);
+  DCHECK_ALIGNED(offset, 2u);
   const uint8_t* addr = &code[offset];
   return
       (static_cast<uint32_t>(addr[0]) << 16) +
@@ -151,5 +404,18 @@
   return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
 }
 
+uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
+  DCHECK_LE(offset + 2u, code.size());
+  DCHECK_ALIGNED(offset, 2u);
+  const uint8_t* addr = &code[offset];
+  return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
+}
+
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+  return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index ab37802..183e5e6 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -17,13 +17,57 @@
 #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
 #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
 
+#include "arch/arm/registers_arm.h"
+#include "base/array_ref.h"
+#include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
+
+namespace arm {
+class ArmVIXLAssembler;
+}  // namespace arm
+
 namespace linker {
 
 class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
  public:
+  static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
+
+  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+                                                  uint32_t holder_reg,
+                                                  bool narrow) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    DCHECK(!narrow || base_reg < 8u) << base_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+  }
+
+  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+    CheckValidReg(root_reg);
+    DCHECK(!narrow || root_reg < 8u) << root_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
   explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
 
   void PatchCall(std::vector<uint8_t>* code,
@@ -39,18 +83,58 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
 
  private:
+  static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u;
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
+  enum class BakerReadBarrierWidth : uint8_t {
+    kWide,          // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+    kNarrow,        // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+    kLast = kNarrow
+  };
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBitsForRegister = 4u;
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+  static constexpr size_t kBitsForBakerReadBarrierWidth =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+  using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
+                                              kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
+                                              kBitsForBakerReadBarrierWidth>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
+  }
+
+  void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
+
   void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
 
   template <typename Vector>
   static uint32_t GetInsn32(Vector* code, uint32_t offset);
 
+  static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
+
+  template <typename Vector>
+  static uint32_t GetInsn16(Vector* code, uint32_t offset);
+
   friend class Thumb2RelativePatcherTest;
 
   DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index f08270d..af5fa40 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -14,8 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/casts.h"
 #include "linker/relative_patcher_test.h"
 #include "linker/arm/relative_patcher_thumb2.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+#include "mirror/object.h"
 #include "oat_quick_method_header.h"
 
 namespace art {
@@ -34,13 +38,102 @@
   static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
   static const uint32_t kPcInsnOffset;
 
+  // The PC in Thumb mode is 4 bytes after the instruction location.
+  static constexpr uint32_t kPcAdjustment = 4u;
+
   // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
-  static constexpr uint32_t kBlPlus0 = 0xf000f800;
-  static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+  static constexpr uint32_t kBlPlus0 = 0xf000f800u;
+  static constexpr uint32_t kBlMinus256 = 0xf7ffff00u;
 
   // Special BL values.
-  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
-  static constexpr uint32_t kBlMinusMax = 0xf400d000;
+  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu;
+  static constexpr uint32_t kBlMinusMax = 0xf400d000u;
+
+  // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
+  static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+
+  // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
+  static constexpr uint32_t kLdrInsn = 0x6800u;
+
+  // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
+  static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
+
+  // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract.
+  static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u;
+
+  // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift.
+  static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u;
+
+  // NOP instructions.
+  static constexpr uint32_t kNopInsn = 0xbf00u;
+  static constexpr uint32_t kNopWInsn = 0xf3af8000u;
+
+  void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+    CHECK_LE(pos, code->size());
+    if (IsUint<16>(insn)) {
+      const uint8_t insn_code[] = {
+          static_cast<uint8_t>(insn),
+          static_cast<uint8_t>(insn >> 8),
+      };
+      static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code).");
+      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+    } else {
+      const uint8_t insn_code[] = {
+          static_cast<uint8_t>(insn >> 16),
+          static_cast<uint8_t>(insn >> 24),
+          static_cast<uint8_t>(insn),
+          static_cast<uint8_t>(insn >> 8),
+      };
+      static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+    }
+  }
+
+  void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
+    InsertInsn(code, code->size(), insn);
+  }
+
+  std::vector<uint8_t> GenNops(size_t num_nops) {
+    std::vector<uint8_t> result;
+    result.reserve(num_nops * 2u);
+    for (size_t i = 0; i != num_nops; ++i) {
+      PushBackInsn(&result, kNopInsn);
+    }
+    return result;
+  }
+
+  std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
+    std::vector<uint8_t> raw_code;
+    size_t number_of_16_bit_insns =
+        std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); });
+    raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u);
+    for (uint32_t insn : insns) {
+      PushBackInsn(&raw_code, insn);
+    }
+    return raw_code;
+  }
+
+  uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) {
+    if (!IsAligned<2u>(bne_offset)) {
+      LOG(ERROR) << "Unaligned bne_offset: " << bne_offset;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    if (!IsAligned<2u>(target_offset)) {
+      LOG(ERROR) << "Unaligned target_offset: " << target_offset;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    uint32_t diff = target_offset - bne_offset - kPcAdjustment;
+    DCHECK_ALIGNED(diff, 2u);
+    if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) {
+      LOG(ERROR) << "Target out of range: " << diff;
+      return 0xffffffffu;  // Fails code diff later.
+    }
+    return kBneWPlus0 | ((diff >> 1) & 0x7ffu)          // imm11
+                      | (((diff >> 12) & 0x3fu) << 16)  // imm6
+                      | (((diff >> 18) & 1) << 13)      // J1
+                      | (((diff >> 19) & 1) << 11)      // J2
+                      | (((diff >> 20) & 1) << 26);     // S
+  }
 
   bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
                              const ArrayRef<const LinkerPatch>& method1_patches,
@@ -95,9 +188,7 @@
   }
 
   std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key(
-        ArmBaseRelativePatcher::ThunkType::kMethodCall,
-        ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }});  // NOLINT(whitespace/braces)
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
     return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
@@ -125,19 +216,57 @@
     std::vector<uint8_t> result;
     result.reserve(num_nops * 2u + 4u);
     for (size_t i = 0; i != num_nops; ++i) {
-      result.push_back(0x00);
-      result.push_back(0xbf);
+      PushBackInsn(&result, kNopInsn);
     }
-    result.push_back(static_cast<uint8_t>(bl >> 16));
-    result.push_back(static_cast<uint8_t>(bl >> 24));
-    result.push_back(static_cast<uint8_t>(bl));
-    result.push_back(static_cast<uint8_t>(bl >> 8));
+    PushBackInsn(&result, bl);
     return result;
   }
 
   void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
   void TestStringReference(uint32_t string_offset);
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+
+  std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
+                                               uint32_t holder_reg,
+                                               bool narrow) {
+    const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+  }
+
+  uint32_t GetOutputInsn32(uint32_t offset) {
+    CHECK_LE(offset, output_.size());
+    CHECK_GE(output_.size() - offset, 4u);
+    return (static_cast<uint32_t>(output_[offset]) << 16) |
+           (static_cast<uint32_t>(output_[offset + 1]) << 24) |
+           (static_cast<uint32_t>(output_[offset + 2]) << 0) |
+           (static_cast<uint32_t>(output_[offset + 3]) << 8);
+  }
+
+  uint16_t GetOutputInsn16(uint32_t offset) {
+    CHECK_LE(offset, output_.size());
+    CHECK_GE(output_.size() - offset, 2u);
+    return (static_cast<uint32_t>(output_[offset]) << 0) |
+           (static_cast<uint32_t>(output_[offset + 1]) << 8);
+  }
+
+  void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
+  void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -164,7 +293,7 @@
 void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
                                                       uint32_t element_offset) {
   dex_cache_arrays_begin_ = dex_cache_arrays_begin;
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
       LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
   };
@@ -175,7 +304,7 @@
 void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
   constexpr uint32_t kStringIndex = 1u;
   string_index_to_offset_map_.Put(kStringIndex, string_offset);
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
       LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
   };
@@ -214,7 +343,7 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallSelf) {
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -227,11 +356,11 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallOther) {
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  LinkerPatch method2_patches[] = {
+  const LinkerPatch method2_patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
   };
   AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
@@ -254,7 +383,7 @@
 }
 
 TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
-  LinkerPatch patches[] = {
+  const LinkerPatch patches[] = {
       LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
   };
   AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -274,7 +403,7 @@
   constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
   };
 
@@ -303,7 +432,7 @@
   constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method1_code(method1_raw_code);
   ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
   };
 
@@ -325,7 +454,7 @@
   constexpr uint32_t bl_offset_in_method3 = 2u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
   };
 
@@ -347,7 +476,7 @@
   constexpr uint32_t bl_offset_in_method1 = 2u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method1_code(method1_raw_code);
   ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  LinkerPatch method1_patches[] = {
+  const LinkerPatch method1_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
   };
 
@@ -382,7 +511,7 @@
   constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
   ArrayRef<const uint8_t> method3_code(method3_raw_code);
   ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  LinkerPatch method3_patches[] = {
+  const LinkerPatch method3_patches[] = {
       LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
   };
 
@@ -445,5 +574,710 @@
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
+void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  DCHECK_ALIGNED(offset, 4u);
+  DCHECK_LT(offset, 4 * KB);
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    for (uint32_t holder_reg : valid_regs) {
+      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+      const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+      ASSERT_EQ(kMethodCodeSize, raw_code.size());
+      ArrayRef<const uint8_t> code(raw_code);
+      uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+          base_reg, holder_reg, /* narrow */ false);
+      const LinkerPatch patches[] = {
+          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+      };
+      ++method_idx;
+      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+    }
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    for (uint32_t holder_reg : valid_regs) {
+      ++method_idx;
+      uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+      const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+      ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+      ASSERT_TRUE(
+          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+      std::vector<uint8_t> expected_thunk =
+          CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
+      ASSERT_GT(output_.size(), thunk_offset);
+      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                             expected_thunk.size());
+      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+        ASSERT_TRUE(false);
+      }
+
+      size_t gray_check_offset = thunk_offset;
+      if (holder_reg == base_reg) {
+        // Verify that the null-check uses the correct register, i.e. holder_reg.
+        if (holder_reg < 8) {
+          ASSERT_GE(output_.size() - gray_check_offset, 2u);
+          ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+          gray_check_offset +=2u;
+        } else {
+          ASSERT_GE(output_.size() - gray_check_offset, 6u);
+          ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+          ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
+          gray_check_offset += 6u;
+        }
+      }
+      // Verify that the lock word for gray bit check is loaded from the holder address.
+      ASSERT_GE(output_.size() - gray_check_offset,
+                4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+      const uint32_t load_lock_word =
+          kLdrWInsn |
+          (holder_reg << 16) |
+          (/* IP */ 12 << 12) |
+          mirror::Object::MonitorOffset().Uint32Value();
+      ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+      // Verify the gray bit check.
+      DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
+      uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+      const uint32_t tst_gray_bit_without_offset =
+          0xf0100f00 | (/* IP */ 12 << 16)
+                     | (((ror_shift >> 4) & 1) << 26)   // i
+                     | (((ror_shift >> 1) & 7) << 12)   // imm3
+                     | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
+      EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+      EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u);  // BNE
+      // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+      const uint32_t fake_dependency =
+          0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+          (/* IP */ 12) |           // Rm = IP
+          (base_reg << 16) |        // Rn = base_reg
+          (base_reg << 8);          // Rd = base_reg
+      EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+      // Do not check the rest of the implementation.
+
+      // The next thunk follows on the next aligned offset.
+      thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+    }
+  }
+}
+
+void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  DCHECK_ALIGNED(offset, 4u);
+  DCHECK_LT(offset, 32u);
+  constexpr size_t kMethodCodeSize = 6u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    if (base_reg >= 8u) {
+      continue;
+    }
+    for (uint32_t holder_reg : valid_regs) {
+      uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+      const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+      ASSERT_EQ(kMethodCodeSize, raw_code.size());
+      ArrayRef<const uint8_t> code(raw_code);
+      uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+          base_reg, holder_reg, /* narrow */ true);
+      const LinkerPatch patches[] = {
+          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+      };
+      ++method_idx;
+      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+    }
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    if (base_reg >= 8u) {
+      continue;
+    }
+    for (uint32_t holder_reg : valid_regs) {
+      ++method_idx;
+      uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+      uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+      const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+      ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+      ASSERT_TRUE(
+          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+      std::vector<uint8_t> expected_thunk =
+          CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
+      ASSERT_GT(output_.size(), thunk_offset);
+      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                             expected_thunk.size());
+      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+        ASSERT_TRUE(false);
+      }
+
+      size_t gray_check_offset = thunk_offset;
+      if (holder_reg == base_reg) {
+        // Verify that the null-check uses the correct register, i.e. holder_reg.
+        if (holder_reg < 8) {
+          ASSERT_GE(output_.size() - gray_check_offset, 2u);
+          ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+          gray_check_offset +=2u;
+        } else {
+          ASSERT_GE(output_.size() - gray_check_offset, 6u);
+          ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+          ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
+          gray_check_offset += 6u;
+        }
+      }
+      // Verify that the lock word for gray bit check is loaded from the holder address.
+      ASSERT_GE(output_.size() - gray_check_offset,
+                4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+      const uint32_t load_lock_word =
+          kLdrWInsn |
+          (holder_reg << 16) |
+          (/* IP */ 12 << 12) |
+          mirror::Object::MonitorOffset().Uint32Value();
+      ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+      // Verify the gray bit check.
+      DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
+      uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+      const uint32_t tst_gray_bit_without_offset =
+          0xf0100f00 | (/* IP */ 12 << 16)
+                     | (((ror_shift >> 4) & 1) << 26)   // i
+                     | (((ror_shift >> 1) & 7) << 12)   // imm3
+                     | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
+      EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+      EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u);  // BNE
+      // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+      const uint32_t fake_dependency =
+          0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+          (/* IP */ 12) |           // Rm = IP
+          (base_reg << 16) |        // Rn = base_reg
+          (base_reg << 8);          // Rd = base_reg
+      EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+      // Do not check the rest of the implementation.
+
+      // The next thunk follows on the next aligned offset.
+      thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+    }
+  }
+}
+
+#define TEST_BAKER_FIELD_WIDE(offset, ref_reg)    \
+  TEST_F(Thumb2RelativePatcherTest,               \
+    BakerOffsetWide##offset##_##ref_reg) {        \
+    TestBakerFieldWide(offset, ref_reg);          \
+  }
+
+TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
+
+#define TEST_BAKER_FIELD_NARROW(offset, ref_reg)  \
+  TEST_F(Thumb2RelativePatcherTest,               \
+    BakerOffsetNarrow##offset##_##ref_reg) {      \
+    TestBakerFieldNarrow(offset, ref_reg);        \
+  }
+
+TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
+  // One thunk in the middle with maximum distance branches to it from both sides.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 6u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  // Enforce thunk reservation with a tiny method.
+  AddCompiledMethod(MethodRef(3u), kNopCode);
+
+  constexpr uint32_t kLiteralOffset2 = 4;
+  static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment),
+                "PC for BNE must be aligned.");
+
+  // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch
+  // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract:
+  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+  size_t thunk_size =
+      CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
+  size_t filler2_size =
+      1 * MB - (kLiteralOffset2 + kPcAdjustment)
+             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+  AddCompiledMethod(MethodRef(4u), filler2_code);
+
+  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code2(raw_code2);
+  const LinkerPatch patches2[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+  Link();
+
+  uint32_t first_method_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(5u);
+  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+  const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000;
+  const std::vector<uint8_t> expected_code1 =
+      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+  const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
+  // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction
+  // earlier, so the thunk is emitted before the filler.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 4u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  Link();
+
+  const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment));
+  const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
+  // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded
+  // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+  constexpr uint32_t kLiteralOffset1 = 6u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code1(raw_code1);
+  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+  const LinkerPatch patches1[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+  constexpr uint32_t expected_thunk_offset =
+      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+  size_t filler1_size = expected_thunk_offset -
+                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+  AddCompiledMethod(MethodRef(2u), filler1_code);
+
+  // Enforce thunk reservation with a tiny method.
+  AddCompiledMethod(MethodRef(3u), kNopCode);
+
+  constexpr uint32_t kReachableFromOffset2 = 4;
+  constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2;
+  static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment),
+                "PC for BNE must be aligned.");
+
+  // If not for the extra NOP, this would allow reaching the thunk from the BNE
+  // of a method 1MiB away. Backward branch reaches the full 1MiB  but we need to take
+  // PC adjustment into account. Things to subtract:
+  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+  size_t thunk_size =
+      CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
+  size_t filler2_size =
+      1 * MB - (kReachableFromOffset2 + kPcAdjustment)
+             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+  AddCompiledMethod(MethodRef(4u), filler2_code);
+
+  // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle.
+  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+  ArrayRef<const uint8_t> code2(raw_code2);
+  const LinkerPatch patches2[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+  };
+  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+  Link();
+
+  uint32_t first_method_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(5u);
+  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+  const uint32_t bne_last =
+      BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment));
+  const std::vector<uint8_t> expected_code1 =
+      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+  const std::vector<uint8_t> expected_code2 =
+      RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn});
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerArray) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  auto ldr = [](uint32_t base_reg) {
+    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+    return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12);
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+    const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the lock word for gray bit check is loaded from the correct address
+    // before the base_reg which points to the array data.
+    ASSERT_GE(output_.size() - thunk_offset,
+              4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+    int32_t data_offset =
+        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+    ASSERT_LT(offset, 0);
+    ASSERT_GT(offset, -256);
+    const uint32_t load_lock_word =
+        kLdrNegativeOffset |
+        (-offset & 0xffu) |
+        (base_reg << 16) |
+        (/* IP */ 12 << 12);
+    EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset));
+    // Verify the gray bit check.
+    DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
+    uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+    const uint32_t tst_gray_bit_without_offset =
+        0xf0100f00 | (/* IP */ 12 << 16)
+                   | (((ror_shift >> 4) & 1) << 26)   // i
+                   | (((ror_shift >> 1) & 7) << 12)   // imm3
+                   | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
+    EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u));
+    EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u);  // BNE
+    // Verify the fake dependency.
+    const uint32_t fake_dependency =
+        0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+        (/* IP */ 12) |           // Rm = IP
+        (base_reg << 16) |        // Rn = base_reg
+        (base_reg << 8);          // Rd = base_reg
+    EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u));
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+  }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 4u;
+  uint32_t method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+    const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset,
+            Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+    const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
+    if (root_reg < 8) {
+      ASSERT_GE(output_.size() - thunk_offset, 2u);
+      ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+    } else {
+      ASSERT_GE(output_.size() - thunk_offset, 6u);
+      ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+      ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
+    }
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+  }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
+                                      // Not appplicable to high registers.
+  };
+  constexpr size_t kMethodCodeSize = 6u;
+  constexpr size_t kLiteralOffset = 2u;
+  uint32_t method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+    const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset,
+            Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+  method_idx = 0u;
+  for (uint32_t root_reg : valid_regs) {
+    ++method_idx;
+    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+    uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+    const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+    ASSERT_GE(output_.size() - thunk_offset, 2u);
+    ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+  }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
+  // Test 1MiB of patches to the same thunk to stress-test different large offsets.
+  // (The low bits are not that important but the location of the high bits is easy to get wrong.)
+  std::vector<uint8_t> code;
+  code.reserve(1 * MB);
+  const size_t num_patches = 1 * MB / 8u;
+  std::vector<LinkerPatch> patches;
+  patches.reserve(num_patches);
+  const uint32_t ldr =
+      kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
+  uint32_t encoded_data =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
+  for (size_t i = 0; i != num_patches; ++i) {
+    PushBackInsn(&code, ldr);
+    PushBackInsn(&code, kBneWPlus0);
+    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+  }
+  ASSERT_EQ(1 * MB, code.size());
+  ASSERT_EQ(num_patches, patches.size());
+  AddCompiledMethod(MethodRef(1u),
+                    ArrayRef<const uint8_t>(code),
+                    ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  // The thunk is right after the method code.
+  DCHECK_ALIGNED(1 * MB, kArmAlignment);
+  std::vector<uint8_t> expected_code;
+  for (size_t i = 0; i != num_patches; ++i) {
+    PushBackInsn(&expected_code, ldr);
+    PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB));
+    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+  }
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
+  // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
+  // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
+  // hold when we're reserving thunks of different sizes. This test exposes the situation
+  // by using Baker thunks and a method call thunk.
+
+  // Add a method call patch that can reach to method 1 offset + 16MiB.
+  uint32_t method_idx = 0u;
+  constexpr size_t kMethodCallLiteralOffset = 2u;
+  constexpr uint32_t kMissingMethodIdx = 2u;
+  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
+  const LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
+  };
+  ArrayRef<const uint8_t> code1(raw_code1);
+  ++method_idx;
+  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
+
+  // Skip kMissingMethodIdx.
+  ++method_idx;
+  ASSERT_EQ(kMissingMethodIdx, method_idx);
+  // Add a method with the right size that the method code for the next one starts 1MiB
+  // after code for method 1.
+  size_t filler_size =
+      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+             - sizeof(OatQuickMethodHeader);
+  std::vector<uint8_t> filler_code = GenNops(filler_size / 2u);
+  ++method_idx;
+  AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+  // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB
+  // before the currently scheduled MaxNextOffset() for the method call thunk.
+  for (uint32_t i = 0; i != 14; ++i) {
+    filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
+    filler_code = GenNops(filler_size / 2u);
+    ++method_idx;
+    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+  }
+
+  // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
+  // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the
+  // second that needs it kArmAlignment after that. Given the size of the GC root thunk
+  // is more than the space required by the method call thunk plus kArmAlignment,
+  // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
+  // thunk's pending MaxNextOffset() which needs to be adjusted.
+  ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
+            CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
+  static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
+  constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
+  constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
+  // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`.
+  const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12);
+  const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12);
+  const std::vector<uint8_t> last_method_raw_code = RawCode({
+      kNopInsn,                                 // Padding before first GC root read barrier.
+      ldr1, kBneWPlus0,                         // First GC root LDR with read barrier.
+      ldr2, kBneWPlus0,                         // Second GC root LDR with read barrier.
+  });
+  uint32_t encoded_data1 =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
+  uint32_t encoded_data2 =
+      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
+  const LinkerPatch last_method_patches[] = {
+      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
+      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
+  };
+  ++method_idx;
+  AddCompiledMethod(MethodRef(method_idx),
+                    ArrayRef<const uint8_t>(last_method_raw_code),
+                    ArrayRef<const LinkerPatch>(last_method_patches));
+
+  // The main purpose of the test is to check that Link() does not cause a crash.
+  Link();
+
+  ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 5c6fb50..c033c2d 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -57,8 +57,6 @@
     case LinkerPatch::Type::kMethod:
     case LinkerPatch::Type::kCall:
     case LinkerPatch::Type::kCallRelative:
-    case LinkerPatch::Type::kType:
-    case LinkerPatch::Type::kString:
     case LinkerPatch::Type::kBakerReadBarrierBranch:
       return false;
     case LinkerPatch::Type::kTypeRelative:
@@ -305,37 +303,42 @@
   DCHECK_LT(literal_offset, code->size());
   uint32_t insn = GetInsn(code, literal_offset);
   DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000);  // CBNZ Xt, +0 (unpatched)
-  ThunkKey key = GetBakerReadBarrierKey(patch);
+  ThunkKey key = GetBakerThunkKey(patch);
   if (kIsDebugBuild) {
+    const uint32_t encoded_data = key.GetCustomValue1();
+    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
     // Check that the next instruction matches the expected LDR.
-    switch (key.GetType()) {
-      case ThunkType::kBakerReadBarrierField: {
+    switch (kind) {
+      case BakerReadBarrierKind::kField: {
         DCHECK_GE(code->size() - literal_offset, 8u);
         uint32_t next_insn = GetInsn(code, literal_offset + 4u);
         // LDR (immediate) with correct base_reg.
         CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5));
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
         break;
       }
-      case ThunkType::kBakerReadBarrierArray: {
+      case BakerReadBarrierKind::kArray: {
         DCHECK_GE(code->size() - literal_offset, 8u);
         uint32_t next_insn = GetInsn(code, literal_offset + 4u);
         // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
         // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
         CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5));
+        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
         CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
         break;
       }
-      case ThunkType::kBakerReadBarrierRoot: {
+      case BakerReadBarrierKind::kGcRoot: {
         DCHECK_GE(literal_offset, 4u);
         uint32_t prev_insn = GetInsn(code, literal_offset - 4u);
         // LDR (immediate) with correct root_reg.
-        CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg);
+        const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+        CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
         break;
       }
       default:
-        LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+        LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
         UNREACHABLE();
     }
   }
@@ -347,49 +350,6 @@
   SetInsn(code, literal_offset, insn);
 }
 
-ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey(
-    const LinkerPatch& patch) {
-  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
-  uint32_t value = patch.GetBakerCustomValue1();
-  BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value);
-  ThunkParams params;
-  switch (type) {
-    case BakerReadBarrierKind::kField:
-      params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.field_params.base_reg);
-      params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
-      CheckValidReg(params.field_params.holder_reg);
-      break;
-    case BakerReadBarrierKind::kArray:
-      params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.array_params.base_reg);
-      params.array_params.dummy = 0u;
-      DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
-      break;
-    case BakerReadBarrierKind::kGcRoot:
-      params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.root_params.root_reg);
-      params.root_params.dummy = 0u;
-      DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type);
-      UNREACHABLE();
-  }
-  constexpr uint8_t kTypeTranslationOffset = 1u;
-  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
-                static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
-                "Thunk type translation check.");
-  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset ==
-                static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray),
-                "Thunk type translation check.");
-  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
-                static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
-                "Thunk type translation check.");
-  return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset),
-                  params);
-}
-
 #define __ assembler.GetVIXLAssembler()->
 
 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
@@ -419,28 +379,22 @@
   // Note: The fake dependency is unnecessary for the slow path.
 }
 
-std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler,
+                                                        uint32_t encoded_data) {
   using namespace vixl::aarch64;  // NOLINT(build/namespaces)
-  ArenaPool pool;
-  ArenaAllocator arena(&pool);
-  arm64::Arm64Assembler assembler(&arena);
-
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall: {
-      // The thunk just uses the entry point in the ArtMethod. This works even for calls
-      // to the generic JNI and interpreter trampolines.
-      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-          kArm64PointerSize).Int32Value());
-      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
-      break;
-    }
-    case ThunkType::kBakerReadBarrierField: {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
       // Check if the holder is gray and, if not, add fake dependency to the base register
       // and return to the LDR instruction to load the reference. Otherwise, use introspection
       // to load the reference and call the entrypoint (in IP1) that performs further checks
       // on the reference and marks it if needed.
-      auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg);
-      auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg);
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      auto holder_reg =
+          Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       // If base_reg differs from holder_reg, the offset was too large and we must have
@@ -469,8 +423,11 @@
       }
       break;
     }
-    case ThunkType::kBakerReadBarrierArray: {
-      auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg);
+    case BakerReadBarrierKind::kArray: {
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       vixl::aarch64::Label slow_path;
@@ -489,12 +446,15 @@
       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
       break;
     }
-    case ThunkType::kBakerReadBarrierRoot: {
+    case BakerReadBarrierKind::kGcRoot: {
       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
       // and it does not have a forwarding address), call the correct introspection entrypoint;
       // otherwise return the reference (or the extracted forwarding address).
       // There is no gray bit check for GC roots.
-      auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg);
+      auto root_reg =
+          Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       vixl::aarch64::Label return_label, not_marked, forwarding_address;
@@ -517,6 +477,30 @@
       __ Br(lr);
       break;
     }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm64::Arm64Assembler assembler(&arena);
+
+  switch (key.GetType()) {
+    case ThunkType::kMethodCall: {
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64PointerSize).Int32Value());
+      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+      break;
+    }
+    case ThunkType::kBakerReadBarrier: {
+      CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
+      break;
+    }
   }
 
   // Ensure we emit the literal pool.
@@ -529,24 +513,20 @@
 
 #undef __
 
-uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
-  switch (type) {
+uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
     case ThunkType::kMethodCall:
       return kMaxMethodCallPositiveDisplacement;
-    case ThunkType::kBakerReadBarrierField:
-    case ThunkType::kBakerReadBarrierArray:
-    case ThunkType::kBakerReadBarrierRoot:
+    case ThunkType::kBakerReadBarrier:
       return kMaxBcondPositiveDisplacement;
   }
 }
 
-uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
-  switch (type) {
+uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+  switch (key.GetType()) {
     case ThunkType::kMethodCall:
       return kMaxMethodCallNegativeDisplacement;
-    case ThunkType::kBakerReadBarrierField:
-    case ThunkType::kBakerReadBarrierArray:
-    case ThunkType::kBakerReadBarrierRoot:
+    case ThunkType::kBakerReadBarrier:
       return kMaxBcondNegativeDisplacement;
   }
 }
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 71ab70e..b00dd08 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -23,17 +23,15 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
+
+namespace arm64 {
+class Arm64Assembler;
+}  // namespace arm64
+
 namespace linker {
 
 class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
  public:
-  enum class BakerReadBarrierKind : uint8_t {
-    kField,   // Field get or array get with constant offset (i.e. constant index).
-    kArray,   // Array get with index in register.
-    kGcRoot,  // GC root load.
-    kLast
-  };
-
   static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
     CheckValidReg(base_reg);
     CheckValidReg(holder_reg);
@@ -77,14 +75,20 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
 
  private:
   static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
 
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
   static constexpr size_t kBitsForBakerReadBarrierKind =
       MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
   static constexpr size_t kBitsForRegister = 5u;
@@ -96,9 +100,11 @@
       BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
 
   static void CheckValidReg(uint32_t reg) {
-    DCHECK(reg < 30u && reg != 16u && reg != 17u);
+    DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
   }
 
+  void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
+
   static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
 
   static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 57ea886..b6549ee 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -167,9 +167,7 @@
   }
 
   std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key(
-        ArmBaseRelativePatcher::ThunkType::kMethodCall,
-        ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }});  // NOLINT(whitespace/braces)
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
     return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
@@ -473,25 +471,22 @@
   std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
     const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
-    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
-    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
-    return patcher->CompileThunk(key);
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
   std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
     LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
-    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
-    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
-    return patcher->CompileThunk(key);
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
   std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
     LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
-    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
-    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
-    return patcher->CompileThunk(key);
+    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
   }
 
   uint32_t GetOutputInsn(uint32_t offset) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 6b5387a..5091c0b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -28,7 +28,6 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
-#include "compiled_class.h"
 #include "compiled_method.h"
 #include "debug/method_debug_info.h"
 #include "dex/verification_results.h"
@@ -712,17 +711,17 @@
 
   bool EndClass() {
     ClassReference class_ref(dex_file_, class_def_index_);
-    CompiledClass* compiled_class = writer_->compiler_driver_->GetCompiledClass(class_ref);
     mirror::Class::Status status;
-    if (compiled_class != nullptr) {
-      status = compiled_class->GetStatus();
-    } else if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
-      // The oat class status is used only for verification of resolved classes,
-      // so use kStatusErrorResolved whether the class was resolved or unresolved
-      // during compile-time verification.
-      status = mirror::Class::kStatusErrorResolved;
-    } else {
-      status = mirror::Class::kStatusNotReady;
+    bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status);
+    if (!found) {
+      if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
+        // The oat class status is used only for verification of resolved classes,
+        // so use kStatusErrorResolved whether the class was resolved or unresolved
+        // during compile-time verification.
+        status = mirror::Class::kStatusErrorResolved;
+      } else {
+        status = mirror::Class::kStatusNotReady;
+      }
     }
 
     writer_->oat_classes_.emplace_back(offset_,
@@ -1337,16 +1336,6 @@
                 PatchMethodAddress(&patched_code_, literal_offset, method);
                 break;
               }
-              case LinkerPatch::Type::kString: {
-                mirror::String* string = GetTargetString(patch);
-                PatchObjectAddress(&patched_code_, literal_offset, string);
-                break;
-              }
-              case LinkerPatch::Type::kType: {
-                mirror::Class* type = GetTargetType(patch);
-                PatchObjectAddress(&patched_code_, literal_offset, type);
-                break;
-              }
               case LinkerPatch::Type::kBakerReadBarrierBranch: {
                 writer_->relative_patcher_->PatchBakerReadBarrierBranch(&patched_code_,
                                                                         patch,
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 5e70a82..1e75f10 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -310,16 +310,18 @@
   // least one predecessor is not covered by the same TryItem as the try block.
   // We do not split each edge separately, but rather create one boundary block
   // that all predecessors are relinked to. This preserves loop headers (b/23895756).
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+  for (const auto& entry : try_block_info) {
+    uint32_t block_id = entry.first;
+    const DexFile::TryItem* try_item = entry.second;
+    HBasicBlock* try_block = graph_->GetBlocks()[block_id];
     for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
-      if (GetTryItem(predecessor, try_block_info) != entry.second) {
+      if (GetTryItem(predecessor, try_block_info) != try_item) {
         // Found a predecessor not covered by the same TryItem. Insert entering
         // boundary block.
         HTryBoundary* try_entry =
             new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
         try_block->CreateImmediateDominator()->AddInstruction(try_entry);
-        LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+        LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks);
         break;
       }
     }
@@ -327,8 +329,10 @@
 
   // Do a second pass over the try blocks and insert exit TryBoundaries where
   // the successor is not in the same TryItem.
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+  for (const auto& entry : try_block_info) {
+    uint32_t block_id = entry.first;
+    const DexFile::TryItem* try_item = entry.second;
+    HBasicBlock* try_block = graph_->GetBlocks()[block_id];
     // NOTE: Do not use iterators because SplitEdge would invalidate them.
     for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
       HBasicBlock* successor = try_block->GetSuccessors()[i];
@@ -337,7 +341,7 @@
       // covered by the same TryItem. Otherwise the previous pass would have
       // created a non-throwing boundary block.
       if (GetTryItem(successor, try_block_info) != nullptr) {
-        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+        DCHECK_EQ(try_item, GetTryItem(successor, try_block_info));
         continue;
       }
 
@@ -345,7 +349,7 @@
       HTryBoundary* try_exit =
           new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
       graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
-      LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+      LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks);
     }
   }
 }
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d38d5f8..f3ecdf0 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1550,7 +1550,7 @@
         HBasicBlock* block = GetPreHeader(loop, check);
         HInstruction* cond =
             new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
-        InsertDeoptInLoop(loop, block, cond);
+        InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true);
         ReplaceInstruction(check, array);
         return true;
       }
@@ -1616,11 +1616,16 @@
   }
 
   /** Inserts a deoptimization test in a loop preheader. */
-  void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+  void InsertDeoptInLoop(HLoopInformation* loop,
+                         HBasicBlock* block,
+                         HInstruction* condition,
+                         bool is_null_check = false) {
     HInstruction* suspend = loop->GetSuspendCheck();
     block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    DeoptimizationKind kind =
+        is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
     HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
-        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
+        GetGraph()->GetArena(), condition, kind, suspend->GetDexPc());
     block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
     if (suspend->HasEnvironment()) {
       deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1633,7 +1638,7 @@
     HBasicBlock* block = bounds_check->GetBlock();
     block->InsertInstructionBefore(condition, bounds_check);
     HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
-        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
+        GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc());
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
   }
@@ -1729,8 +1734,8 @@
    */
   void InsertPhiNodes() {
     // Scan all new deoptimization blocks.
-    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
-      HBasicBlock* true_block = it1->second;
+    for (const auto& entry : taken_test_loop_) {
+      HBasicBlock* true_block = entry.second;
       HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
       // Scan all instructions in a new deoptimization block.
       for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index 048073e..c806dbf 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -203,7 +203,7 @@
     // Need a new deoptimize instruction that copies the environment
     // of the suspend instruction for the loop.
     HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
-        GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
+        GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc());
     pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
     deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
         suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5136d7d..65f3c72 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -145,7 +145,7 @@
 }
 
 size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
-  auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
   return static_cast<size_t>(pointer_size) * index;
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ea463ee..9ef692a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -842,7 +842,7 @@
     const uint32_t dex_pc = instruction->GetDexPc();
     auto iter = slow_path_map_.find(dex_pc);
     if (iter != slow_path_map_.end()) {
-      auto candidates = iter->second;
+      const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
       for (const auto& it : candidates) {
         InstructionType* other_instruction = it.first;
         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ebd578c..713d370 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
@@ -25,6 +26,7 @@
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_arm.h"
+#include "linker/arm/relative_patcher_thumb2.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -60,10 +62,45 @@
 
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const Register kBakerCcEntrypointRegister = R4;
+
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
 
+static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) {
+  DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister),
+            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+  DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+  DCHECK_EQ(kBakerCcEntrypointRegister,
+            instruction->GetLocations()->GetTemp(
+                instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>());
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
+  ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
+  __ BindTrackedLabel(bne_label);
+  Label placeholder_label;
+  __ b(&placeholder_label, NE);  // Placeholder, patched at link-time.
+  __ Bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+  return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
 static constexpr int kRegListThreshold = 4;
 
 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -585,8 +622,13 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadImmediate(calling_convention.GetRegisterAt(0),
+                     static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -819,7 +861,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -954,6 +996,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     // /* int32_t */ monitor = obj->monitor_
     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
     __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
@@ -1955,13 +2009,10 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_string_patches_(StringReferenceValueComparator(),
-                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_type_patches_(TypeReferenceValueComparator(),
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2672,7 +2723,10 @@
 void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -5281,7 +5335,18 @@
   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
     // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation()) {
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // loads we need a temporary only if the offset is too big.
+      if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -5747,11 +5812,35 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
-  // Also need for String compression feature.
-  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
-      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation() &&
+        instruction->GetIndex()->IsConstant()) {
+      // Array loads with constant index are treated as field loads.
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // constant index loads we need a temporary only if the offset is too big.
+      uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+      uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+      offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+      if (offset >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+               !Runtime::Current()->UseJitCompilation() &&
+               !instruction->GetIndex()->IsConstant()) {
+      // We need a non-scratch temporary for the array data pointer.
+      locations->AddTemp(Location::RequiresRegister());
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
+  } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    // Also need a temporary for String compression feature.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5863,8 +5952,20 @@
         Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
-        codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+        DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+        if (index.IsConstant()) {
+          // Array load with a constant index can be treated as a field load.
+          data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                          out_loc,
+                                                          obj,
+                                                          data_offset,
+                                                          locations->GetTemp(0),
+                                                          /* needs_null_check */ false);
+        } else {
+          codegen_->GenerateArrayLoadWithBakerReadBarrier(
+              instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+        }
       } else {
         Register out = out_loc.AsRegister<Register>();
         if (index.IsConstant()) {
@@ -6269,6 +6370,15 @@
   }
 }
 
+void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+  LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instruction) {
+  LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
 void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   RegisterSet caller_saves = RegisterSet::Empty();
   InvokeRuntimeCallingConvention calling_convention;
@@ -6639,20 +6749,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6701,6 +6805,13 @@
       // For non-Baker read barrier we have a temp-clobbering call.
     }
   }
+  if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+    if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+        (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+            !Runtime::Current()->UseJitCompilation())) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6734,13 +6845,6 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
-                                                                    cls->GetTypeIndex()));
-      break;
-    }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6840,20 +6944,14 @@
 HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6880,6 +6978,9 @@
         // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
         // that the the kPrimNot result register is the same as the first argument register.
         locations->SetCustomSlowPathCallerSaves(caller_saves);
+        if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+          locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+        }
       } else {
         // For non-Baker read barrier we have a temp-clobbering call.
       }
@@ -6896,12 +6997,6 @@
   HLoadString::LoadKind load_kind = load->GetLoadKind();
 
   switch (load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
-                                                                      load->GetStringIndex()));
-      return;  // No dex cache slow path.
-    }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARM::PcRelativePatchInfo* labels =
@@ -7050,6 +7145,9 @@
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7923,48 +8021,96 @@
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used.
-      //
-      // Note that we do not actually check the value of
-      // `GetIsGcMarking()` to decide whether to mark the loaded GC
-      // root or not.  Instead, we load into `temp` the read barrier
-      // mark entry point corresponding to register `root`. If `temp`
-      // is null, it means that `GetIsGcMarking()` is false, and vice
-      // versa.
-      //
-      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
-      //     // Slow path.
-      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
-      //   }
+      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+          !Runtime::Current()->UseJitCompilation()) {
+        // Note that we do not actually check the value of `GetIsGcMarking()`
+        // to decide whether to mark the loaded GC root or not.  Instead, we
+        // load into `temp` (actually kBakerCcEntrypointRegister) the read
+        // barrier mark introspection entrypoint. If `temp` is null, it means
+        // that `GetIsGcMarking()` is false, and vice versa.
+        //
+        // We use link-time generated thunks for the slow path. That thunk
+        // checks the reference and jumps to the entrypoint if needed.
+        //
+        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+        //     lr = &return_address;
+        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //     if (temp != nullptr) {
+        //        goto gc_root_thunk<root_reg>(lr)
+        //     }
+        //   return_address:
 
-      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
-      Location temp = Location::RegisterLocation(LR);
-      SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
-          instruction, root, /* entrypoint */ temp);
-      codegen_->AddSlowPath(slow_path);
+        CheckLastTempIsBakerCcEntrypointRegister(instruction);
+        bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+        uint32_t custom_data =
+            linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
+        Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
 
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+        // entrypoint_reg =
+        //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+        DCHECK_EQ(IP, 12);
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+        __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
-      static_assert(
-          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
-          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
-          "have different sizes.");
-      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
-                    "have different sizes.");
+        Label return_address;
+        __ AdrCode(LR, &return_address);
+        __ CmpConstant(kBakerCcEntrypointRegister, 0);
+        // Currently the offset is always within range. If that changes,
+        // we shall have to split the load the same way as for fields.
+        DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+        DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+        ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+        int old_position = GetAssembler()->GetBuffer()->GetPosition();
+        __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        EmitPlaceholderBne(codegen_, bne_label);
+        __ Bind(&return_address);
+        DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+                  narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+                         : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+      } else {
+        // Note that we do not actually check the value of
+        // `GetIsGcMarking()` to decide whether to mark the loaded GC
+        // root or not.  Instead, we load into `temp` the read barrier
+        // mark entry point corresponding to register `root`. If `temp`
+        // is null, it means that `GetIsGcMarking()` is false, and vice
+        // versa.
+        //
+        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+        //     // Slow path.
+        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
+        //   }
 
-      // The entrypoint is null when the GC is not marking, this prevents one load compared to
-      // checking GetIsGcMarking.
-      __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
-      __ Bind(slow_path->GetExitLabel());
+        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+        Location temp = Location::RegisterLocation(LR);
+        SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+            instruction, root, /* entrypoint */ temp);
+        codegen_->AddSlowPath(slow_path);
+
+        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+        // Loading the entrypoint does not require a load acquire since it is only changed when
+        // threads are suspended or running a checkpoint.
+        __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
+        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+        __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        static_assert(
+            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+            "have different sizes.");
+        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                      "have different sizes.");
+
+        // The entrypoint is null when the GC is not marking, this prevents one load compared to
+        // checking GetIsGcMarking.
+        __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+        __ Bind(slow_path->GetExitLabel());
+      }
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
@@ -7982,6 +8128,16 @@
   }
 }
 
+void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+    if (!Runtime::Current()->UseJitCompilation()) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+    }
+  }
+}
+
 void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                              Location ref,
                                                              Register obj,
@@ -7991,6 +8147,76 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     HeapReference<mirror::Object> reference = *(obj+offset);
+    //   gray_return_address:
+
+    DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+    Register ref_reg = ref.AsRegister<Register>();
+    bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+    Register base = obj;
+    if (offset >= kReferenceLoadMinFarOffset) {
+      base = temp.AsRegister<Register>();
+      DCHECK_NE(base, kBakerCcEntrypointRegister);
+      static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+      __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
+      offset &= (kReferenceLoadMinFarOffset - 1u);
+      // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+      // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+      // increase the overall code size when taking the generated thunks into account.
+      DCHECK(!narrow);
+    }
+    CheckLastTempIsBakerCcEntrypointRegister(instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
+    Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(IP, 12);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+    __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+    Label return_address;
+    __ AdrCode(LR, &return_address);
+    __ CmpConstant(kBakerCcEntrypointRegister, 0);
+    EmitPlaceholderBne(this, bne_label);
+    DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+    DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+    ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+    int old_position = GetAssembler()->GetBuffer()->GetPosition();
+    __ LoadFromOffset(kLoadWord, ref_reg, base, offset);
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+              narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+                     : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
   ScaleFactor no_scale_factor = TIMES_1;
@@ -8011,9 +8237,67 @@
   static_assert(
       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  ScaleFactor scale_factor = TIMES_4;
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     HeapReference<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    Register index_reg = index.AsRegister<Register>();
+    Register ref_reg = ref.AsRegister<Register>();
+    Register data_reg = temp.AsRegister<Register>();
+    DCHECK_NE(data_reg, kBakerCcEntrypointRegister);
+
+    CheckLastTempIsBakerCcEntrypointRegister(instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg);
+    Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(IP, 12);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+    __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+    __ AddConstant(data_reg, obj, data_offset);
+
+    Label return_address;
+    __ AdrCode(LR, &return_address);
+    __ CmpConstant(kBakerCcEntrypointRegister, 0);
+    EmitPlaceholderBne(this, bne_label);
+    ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+    int old_position = GetAssembler()->GetBuffer()->GetPosition();
+    __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+              BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
       instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
@@ -8025,9 +8309,7 @@
                                                                  Location index,
                                                                  ScaleFactor scale_factor,
                                                                  Location temp,
-                                                                 bool needs_null_check,
-                                                                 bool always_update_field,
-                                                                 Register* temp2) {
+                                                                 bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -8038,6 +8320,73 @@
   // not.
   //
   // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //   }
+
+  Register temp_reg = temp.AsRegister<Register>();
+
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp2`.
+  Location temp2 = Location::RegisterLocation(LR);
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          temp_reg,
+          /* entrypoint */ temp2);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                Register obj,
+                                                                Location field_offset,
+                                                                Location temp,
+                                                                bool needs_null_check,
+                                                                Register temp2) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
   // instead, we load into `temp3` the read barrier mark entry point
   // corresponding to register `ref`. If `temp3` is null, it means
   // that `GetIsGcMarking()` is false, and vice versa.
@@ -8050,52 +8399,30 @@
   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
+  //       old_ref = ref;
   //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   //     }
-  //   } else {
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
 
   Register temp_reg = temp.AsRegister<Register>();
 
-  // Slow path marking the object `ref` when the GC is marking. The
-  // entrypoint will already be loaded in `temp3`.
+  // Slow path updating the object reference at address `obj +
+  // field_offset` when the GC is marking. The entrypoint will already
+  // be loaded in `temp3`.
   Location temp3 = Location::RegisterLocation(LR);
-  SlowPathCodeARM* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
-    // supports address of the form `obj + field_offset`, where `obj`
-    // is a register and `field_offset` is a register pair (of which
-    // only the lower half is used). Thus `offset` and `scale_factor`
-    // above are expected to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            temp_reg,
-            *temp2,
-            /* entrypoint */ temp3);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        temp_reg,
-        /* entrypoint */ temp3);
-  }
+  SlowPathCodeARM* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ ScaleFactor::TIMES_1,
+          needs_null_check,
+          temp_reg,
+          temp2,
+          /* entrypoint */ temp3);
   AddSlowPath(slow_path);
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8107,8 +8434,8 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
-  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -8379,18 +8706,9 @@
   return &patches->back();
 }
 
-Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                             dex::StringIndex string_index) {
-  return boot_image_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                           dex::TypeIndex type_index) {
-  return boot_image_type_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) {
+  baker_read_barrier_patches_.emplace_back(custom_data);
+  return &baker_read_barrier_patches_.back().label;
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
@@ -8441,23 +8759,13 @@
   DCHECK(linker_patches->empty());
   size_t size =
       /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      boot_image_string_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
-      boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const auto& entry : boot_image_string_patches_) {
-    const StringReference& target_string = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = literal->GetLabel()->Position();
-    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
-                                                       target_string.dex_file,
-                                                       target_string.string_index.index_));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -8470,14 +8778,9 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
-  for (const auto& entry : boot_image_type_patches_) {
-    const TypeReference& target_type = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = literal->GetLabel()->Position();
-    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
-                                                     target_type.dex_file,
-                                                     target_type.type_index.index_));
+  for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+    linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(),
+                                                                       info.custom_data));
   }
   DCHECK_EQ(size, linker_patches->size());
 }
@@ -8488,13 +8791,6 @@
       [this, value]() { return __ NewLiteral<uint32_t>(value); });
 }
 
-Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
-                                                    MethodToLiteralMap* map) {
-  return map->GetOrCreate(
-      target_method,
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
 void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -8710,14 +9006,20 @@
 
 void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(entry.first);
+    const StringReference& string_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    const auto it = jit_string_roots_.find(string_reference);
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
   for (const auto& entry : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(entry.first);
+    const TypeReference& type_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    const auto it = jit_class_roots_.find(type_reference);
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 86f2f21..47e6be5 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -488,9 +488,11 @@
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                             dex::StringIndex string_index);
-  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
+
+  // Add a new baker read barrier patch and return the label to be bound
+  // before the BNE instruction.
+  Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                        dex::StringIndex string_index,
@@ -503,6 +505,10 @@
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+  // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+  void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -526,11 +532,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).  This operation
-  // requires an extra temporary register, which must be provided as a
-  // non-null pointer (`temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  Register obj,
@@ -538,9 +539,27 @@
                                                  Location index,
                                                  ScaleFactor scale_factor,
                                                  Location temp,
-                                                 bool needs_null_check,
-                                                 bool always_update_field = false,
-                                                 Register* temp2 = nullptr);
+                                                 bool needs_null_check);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                Register obj,
+                                                Location field_offset,
+                                                Location temp,
+                                                bool needs_null_check,
+                                                Register temp2);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -608,7 +627,6 @@
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
-  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           Literal*,
                                           StringReferenceValueComparator>;
@@ -616,8 +634,14 @@
                                         Literal*,
                                         TypeReferenceValueComparator>;
 
+  struct BakerReadBarrierPatchInfo {
+    explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+    Label label;
+    uint32_t custom_data;
+  };
+
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
-  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
                                           ArenaDeque<PcRelativePatchInfo>* patches);
@@ -638,16 +662,14 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
-  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // Baker read barrier patch info.
+  ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 78b627a..7ff100d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -587,8 +587,13 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ Mov(calling_convention.GetRegisterAt(0),
+           static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -851,7 +856,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -1002,6 +1007,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     // /* int32_t */ monitor = obj->monitor_
     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
@@ -1432,11 +1449,7 @@
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_string_patches_(StringReferenceValueComparator(),
-                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_type_patches_(TypeReferenceValueComparator(),
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1498,7 +1511,7 @@
   if (kind == Location::kRegister) {
     scratch = LocationFrom(vixl_temps_.AcquireX());
   } else {
-    DCHECK(kind == Location::kFpuRegister);
+    DCHECK_EQ(kind, Location::kFpuRegister);
     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
         : vixl_temps_.AcquireD());
@@ -1726,9 +1739,9 @@
          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
 }
 
-// Allocate a scratch register from the VIXL pool, querying first into
-// the floating-point register pool, and then the the core register
-// pool.  This is essentially a reimplementation of
+// Allocate a scratch register from the VIXL pool, querying first
+// the floating-point register pool, and then the core register
+// pool. This is essentially a reimplementation of
 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
 // using a different allocation strategy.
 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
@@ -1876,7 +1889,7 @@
       // ask for a scratch register of any type (core or FP).
       //
       // Also, we start by asking for a FP scratch register first, as the
-      // demand of scratch core registers is higher.  This is why we
+      // demand of scratch core registers is higher. This is why we
       // use AcquireFPOrCoreCPURegisterOfSize instead of
       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
       // allocates core scratch registers first.
@@ -2644,6 +2657,38 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
+void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  // For byte case we don't need to shift the index variable so we can encode the data offset into
+  // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
+  // data offset constant generation out of the loop and reduce the critical path length in the
+  // loop.
+  locations->SetInAt(1, shift->GetValue() == 0
+                        ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+                        : Location::RequiresRegister());
+  locations->SetInAt(2, Location::ConstantLocation(shift));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instruction) {
+  Register index_reg = InputRegisterAt(instruction, 0);
+  uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+  uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
+
+  if (shift == 0) {
+    __ Add(OutputRegister(instruction), index_reg, offset);
+  } else {
+    Register offset_reg = InputRegisterAt(instruction, 1);
+    __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
+  }
+}
+
 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -3693,7 +3738,10 @@
 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -4633,20 +4681,6 @@
   return label;
 }
 
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return boot_image_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
-    const DexFile& dex_file, dex::TypeIndex type_index) {
-  return boot_image_type_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
-}
-
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
     uint64_t address) {
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
@@ -4713,9 +4747,7 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      boot_image_string_patches_.size() +
       pc_relative_string_patches_.size() +
-      boot_image_type_patches_.size() +
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
       baker_read_barrier_patches_.size();
@@ -4726,13 +4758,6 @@
                                                               info.pc_insn_label->GetLocation(),
                                                               info.offset_or_index));
   }
-  for (const auto& entry : boot_image_string_patches_) {
-    const StringReference& target_string = entry.first;
-    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
-                                                       target_string.dex_file,
-                                                       target_string.string_index.index_));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -4745,13 +4770,6 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
-  for (const auto& entry : boot_image_type_patches_) {
-    const TypeReference& target_type = entry.first;
-    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
-                                                     target_type.dex_file,
-                                                     target_type.type_index.index_));
-  }
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
     linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
                                                                        info.custom_data));
@@ -4772,14 +4790,6 @@
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
 }
 
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
-    MethodReference target_method,
-    MethodToLiteralMap* map) {
-  return map->GetOrCreate(
-      target_method,
-      [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
-}
-
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   // Explicit clinit checks triggered by static invokes must have been pruned by
   // art::PrepareForRegisterAllocation.
@@ -4819,20 +4829,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -4915,11 +4919,6 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
-                                                            cls->GetTypeIndex()));
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       // Add ADRP with its PC-relative type patch.
@@ -5021,20 +5020,14 @@
 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -5074,10 +5067,6 @@
   Location out_loc = load->GetLocations()->Out();
 
   switch (load->GetLoadKind()) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
-                                                              load->GetStringIndex()));
-      return;  // No dex cache slow path.
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
@@ -6082,7 +6071,7 @@
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
     //     // into LDR, we use an adjusted base register here.
-    //     GcRoot<mirror::Object> reference = *(obj+offset);
+    //     HeapReference<mirror::Object> reference = *(obj+offset);
     //   gray_return_address:
 
     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6177,7 +6166,7 @@
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
     //     // into LDR, we use an adjusted base register here.
-    //     GcRoot<mirror::Object> reference = data[index];
+    //     HeapReference<mirror::Object> reference = data[index];
     //   gray_return_address:
 
     DCHECK(index.IsValid());
@@ -6239,8 +6228,7 @@
                                                                    size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
-                                                                   bool use_load_acquire,
-                                                                   bool always_update_field) {
+                                                                   bool use_load_acquire) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
   // If we are emitting an array load, we should not be using a
@@ -6277,41 +6265,18 @@
   // entrypoint will already be loaded in `temp2`.
   Register temp2 = lr;
   Location temp2_loc = LocationFrom(temp2);
-  SlowPathCodeARM64* slow_path;
-  if (always_update_field) {
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
-    // only supports address of the form `obj + field_offset`, where
-    // `obj` is a register and `field_offset` is a register. Thus
-    // `offset` and `scale_factor` above are expected to be null in
-    // this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            use_load_acquire,
-            temp,
-            /* entrypoint */ temp2_loc);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        use_load_acquire,
-        temp,
-        /* entrypoint */ temp2_loc);
-  }
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          use_load_acquire,
+          temp,
+          /* entrypoint */ temp2_loc);
   AddSlowPath(slow_path);
 
   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -6323,12 +6288,83 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ Cbnz(temp2, slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
+  // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(
       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
   __ Bind(slow_path->GetExitLabel());
 }
 
+void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                  Location ref,
+                                                                  Register obj,
+                                                                  Location field_offset,
+                                                                  Register temp,
+                                                                  bool needs_null_check,
+                                                                  bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       old_ref = ref;
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
+  //     }
+  //   }
+
+  // Slow path updating the object reference at address `obj + field_offset`
+  // when the GC is marking. The entrypoint will already be loaded in `temp2`.
+  Register temp2 = lr;
+  Location temp2_loc = LocationFrom(temp2);
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ 0u /* "times 1" */,
+          needs_null_check,
+          use_load_acquire,
+          temp,
+          /* entrypoint */ temp2_loc);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ Cbnz(temp2, slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
                                                   Location ref,
                                                   Register obj,
@@ -6504,14 +6540,20 @@
 
 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(entry.first);
+    const StringReference& string_reference = entry.first;
+    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+    const auto it = jit_string_roots_.find(string_reference);
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
   for (const auto& entry : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(entry.first);
+    const TypeReference& type_reference = entry.first;
+    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+    const auto it = jit_class_roots_.find(type_reference);
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 3ded3e4..56444dc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -586,11 +586,6 @@
   // before the CBNZ instruction.
   vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
 
-  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
-      const DexFile& dex_file,
-      dex::StringIndex string_index);
-  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                                    dex::TypeIndex type_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
   vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                                 dex::StringIndex string_index,
@@ -635,9 +630,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  vixl::aarch64::Register obj,
@@ -646,8 +638,27 @@
                                                  size_t scale_factor,
                                                  vixl::aarch64::Register temp,
                                                  bool needs_null_check,
-                                                 bool use_load_acquire,
-                                                 bool always_update_field = false);
+                                                 bool use_load_acquire);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                vixl::aarch64::Register obj,
+                                                Location field_offset,
+                                                vixl::aarch64::Register temp,
+                                                bool needs_null_check,
+                                                bool use_load_acquire);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -715,9 +726,6 @@
  private:
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
-  using MethodToLiteralMap = ArenaSafeMap<MethodReference,
-                                          vixl::aarch64::Literal<uint64_t>*,
-                                          MethodReferenceComparator>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           vixl::aarch64::Literal<uint32_t>*,
                                           StringReferenceValueComparator>;
@@ -728,8 +736,6 @@
   vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
                                                              Uint32ToLiteralMap* map);
   vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-  vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
-                                                             MethodToLiteralMap* map);
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
   // and boot image strings/types. The only difference is the interpretation of the
@@ -781,12 +787,8 @@
   Uint64ToLiteralMap uint64_literals_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
-  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d65b327..015e6dd 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm_vixl.h"
 
+#include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
@@ -24,6 +25,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics_arm_vixl.h"
+#include "linker/arm/relative_patcher_thumb2.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -77,6 +79,20 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const vixl32::Register kBakerCcEntrypointRegister = r4;
+
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
@@ -88,6 +104,60 @@
 // Marker that code is yet to be, and must, be implemented.
 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
 
+static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
+                                                         HInstruction* instruction) {
+  DCHECK(temps->IsAvailable(ip));
+  temps->Exclude(ip);
+  DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
+  DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
+            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+  DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+  DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
+      instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
+  ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
+  __ bind(patch_label);
+  vixl32::Label placeholder_label;
+  __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
+  __ bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+  return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
+class EmitAdrCode {
+ public:
+  EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
+      : assembler_(assembler), rd_(rd), label_(label) {
+    ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+    adr_location_ = assembler->GetCursorOffset();
+    assembler->adr(EncodingSize(Wide), rd, label);
+  }
+
+  ~EmitAdrCode() {
+    DCHECK(label_->IsBound());
+    // The ADR emitted by the assembler does not set the Thumb mode bit we need.
+    // TODO: Maybe extend VIXL to allow ADR for return address?
+    uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
+    // Expecting ADR encoding T3 with `(offset & 1) == 0`.
+    DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
+    DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
+    DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
+    DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
+    // Add the Thumb mode bit.
+    raw_adr[2] |= 0x01u;
+  }
+
+ private:
+  ArmVIXLMacroAssembler* const assembler_;
+  vixl32::Register rd_;
+  vixl32::Label* const label_;
+  int32_t adr_location_;
+};
+
 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
 // for each live D registers they treat two corresponding S registers as live ones.
 //
@@ -608,8 +678,14 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
     __ Bind(GetEntryLabel());
+        LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    __ Mov(calling_convention.GetRegisterAt(0),
+           static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+
     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
@@ -845,7 +921,7 @@
     // Baker's read barriers, we need to perform the load of
     // mirror::Object::monitor_ *before* the original reference load.
     // This load-load ordering is required by the read barrier.
-    // The fast path/slow path (for Baker's algorithm) should look like:
+    // The slow path (for Baker's algorithm) should look like:
     //
     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -987,6 +1063,18 @@
 
     __ Bind(GetEntryLabel());
 
+    // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
+    //
+    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
+    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //   if (is_gray) {
+    //     old_ref = ref;
+    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
+    //   }
+
     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
 
     // /* int32_t */ monitor = obj->monitor_
@@ -2005,13 +2093,10 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_string_patches_(StringReferenceValueComparator(),
-                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_type_patches_(TypeReferenceValueComparator(),
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2704,7 +2789,10 @@
 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -5289,7 +5377,18 @@
   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
     // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
-    locations->AddTemp(Location::RequiresRegister());
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation()) {
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // loads we need a temporary only if the offset is too big.
+      if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -5756,11 +5855,35 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
-  // We need a temporary register for the read barrier marking slow
-  // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
-  // Also need for String compression feature.
-  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
-      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+        !Runtime::Current()->UseJitCompilation() &&
+        instruction->GetIndex()->IsConstant()) {
+      // Array loads with constant index are treated as field loads.
+      // If link-time thunks for the Baker read barrier are enabled, for AOT
+      // constant index loads we need a temporary only if the offset is too big.
+      uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+      uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+      offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+      if (offset >= kReferenceLoadMinFarOffset) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+               !Runtime::Current()->UseJitCompilation() &&
+               !instruction->GetIndex()->IsConstant()) {
+      // We need a non-scratch temporary for the array data pointer.
+      locations->AddTemp(Location::RequiresRegister());
+      // And we always need the reserved entrypoint register.
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    } else {
+      locations->AddTemp(Location::RequiresRegister());
+    }
+  } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    // Also need a temporary for String compression feature.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5871,8 +5994,20 @@
         Location temp = locations->GetTemp(0);
         // Note that a potential implicit null check is handled in this
         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
-        codegen_->GenerateArrayLoadWithBakerReadBarrier(
-            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+        DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+        if (index.IsConstant()) {
+          // Array load with a constant index can be treated as a field load.
+          data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+          codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                          out_loc,
+                                                          obj,
+                                                          data_offset,
+                                                          locations->GetTemp(0),
+                                                          /* needs_null_check */ false);
+        } else {
+          codegen_->GenerateArrayLoadWithBakerReadBarrier(
+              instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+        }
       } else {
         vixl32::Register out = OutputRegister(instruction);
         if (index.IsConstant()) {
@@ -6308,6 +6443,16 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instruction) {
+  LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instruction) {
+  LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
   RegisterSet caller_saves = RegisterSet::Empty();
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -6700,20 +6845,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6762,6 +6901,13 @@
       // For non-Baker read barrier we have a temp-clobbering call.
     }
   }
+  if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+    if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+        (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+            !Runtime::Current()->UseJitCompilation())) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    }
+  }
 }
 
 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6795,13 +6941,6 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
-                                                            cls->GetTypeIndex()));
-      break;
-    }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6898,20 +7037,14 @@
 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6938,6 +7071,9 @@
         // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
         // that the the kPrimNot result register is the same as the first argument register.
         locations->SetCustomSlowPathCallerSaves(caller_saves);
+        if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+          locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+        }
       } else {
         // For non-Baker read barrier we have a temp-clobbering call.
       }
@@ -6954,11 +7090,6 @@
   HLoadString::LoadKind load_kind = load->GetLoadKind();
 
   switch (load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
-                                                              load->GetStringIndex()));
-      return;  // No dex cache slow path.
-    }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
@@ -7100,6 +7231,9 @@
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+  }
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7998,48 +8132,98 @@
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used.
-      //
-      // Note that we do not actually check the value of
-      // `GetIsGcMarking()` to decide whether to mark the loaded GC
-      // root or not.  Instead, we load into `temp` the read barrier
-      // mark entry point corresponding to register `root`. If `temp`
-      // is null, it means that `GetIsGcMarking()` is false, and vice
-      // versa.
-      //
-      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-      //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
-      //     // Slow path.
-      //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
-      //   }
+      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+          !Runtime::Current()->UseJitCompilation()) {
+        // Note that we do not actually check the value of `GetIsGcMarking()`
+        // to decide whether to mark the loaded GC root or not.  Instead, we
+        // load into `temp` (actually kBakerCcEntrypointRegister) the read
+        // barrier mark introspection entrypoint. If `temp` is null, it means
+        // that `GetIsGcMarking()` is false, and vice versa.
+        //
+        // We use link-time generated thunks for the slow path. That thunk
+        // checks the reference and jumps to the entrypoint if needed.
+        //
+        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+        //     lr = &return_address;
+        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //     if (temp != nullptr) {
+        //        goto gc_root_thunk<root_reg>(lr)
+        //     }
+        //   return_address:
 
-      // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
-      Location temp = LocationFrom(lr);
-      SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
-              instruction, root, /* entrypoint */ temp);
-      codegen_->AddSlowPath(slow_path);
+        UseScratchRegisterScope temps(GetVIXLAssembler());
+        ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+        bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+        uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+            root_reg.GetCode(), narrow);
+        vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
 
-      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-      const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
-      // Loading the entrypoint does not require a load acquire since it is only changed when
-      // threads are suspended or running a checkpoint.
-      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+        // entrypoint_reg =
+        //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+        DCHECK_EQ(ip.GetCode(), 12u);
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+        __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
 
-      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-      GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
-      static_assert(
-          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
-          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
-          "have different sizes.");
-      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
-                    "have different sizes.");
+        vixl::EmissionCheckScope guard(GetVIXLAssembler(),
+                                       4 * vixl32::kMaxInstructionSizeInBytes);
+        vixl32::Label return_address;
+        EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+        __ cmp(kBakerCcEntrypointRegister, Operand(0));
+        // Currently the offset is always within range. If that changes,
+        // we shall have to split the load the same way as for fields.
+        DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+        ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+        __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
+        EmitPlaceholderBne(codegen_, bne_label);
+        __ Bind(&return_address);
+        DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                  narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+                         : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+      } else {
+        // Note that we do not actually check the value of
+        // `GetIsGcMarking()` to decide whether to mark the loaded GC
+        // root or not.  Instead, we load into `temp` the read barrier
+        // mark entry point corresponding to register `root`. If `temp`
+        // is null, it means that `GetIsGcMarking()` is false, and vice
+        // versa.
+        //
+        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+        //     // Slow path.
+        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
+        //   }
 
-      // The entrypoint is null when the GC is not marking, this prevents one load compared to
-      // checking GetIsGcMarking.
-      __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
-      __ Bind(slow_path->GetExitLabel());
+        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+        Location temp = LocationFrom(lr);
+        SlowPathCodeARMVIXL* slow_path =
+            new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+                instruction, root, /* entrypoint */ temp);
+        codegen_->AddSlowPath(slow_path);
+
+        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        const int32_t entry_point_offset =
+            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+        // Loading the entrypoint does not require a load acquire since it is only changed when
+        // threads are suspended or running a checkpoint.
+        GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
+        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+        GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+        static_assert(
+            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+            "have different sizes.");
+        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                      "have different sizes.");
+
+        // The entrypoint is null when the GC is not marking, this prevents one load compared to
+        // checking GetIsGcMarking.
+        __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+        __ Bind(slow_path->GetExitLabel());
+      }
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
@@ -8057,6 +8241,16 @@
   }
 }
 
+void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+    if (!Runtime::Current()->UseJitCompilation()) {
+      locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+    }
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                  Location ref,
                                                                  vixl32::Register obj,
@@ -8066,6 +8260,85 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     HeapReference<mirror::Object> reference = *(obj+offset);
+    //   gray_return_address:
+
+    DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+    vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+    vixl32::Register base = obj;
+    if (offset >= kReferenceLoadMinFarOffset) {
+      base = RegisterFrom(temp);
+      DCHECK(!base.Is(kBakerCcEntrypointRegister));
+      static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+      __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+      offset &= (kReferenceLoadMinFarOffset - 1u);
+      // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+      // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+      // increase the overall code size when taking the generated thunks into account.
+      DCHECK(!narrow);
+    }
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+    uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+        base.GetCode(), obj.GetCode(), narrow);
+    vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip.GetCode(), 12u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+    __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+    vixl::EmissionCheckScope guard(
+        GetVIXLAssembler(),
+        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    vixl32::Label return_address;
+    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+    __ cmp(kBakerCcEntrypointRegister, Operand(0));
+    EmitPlaceholderBne(this, bne_label);
+    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+    __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
+    // Note: We need a specific width for the unpoisoning NEG.
+    if (kPoisonHeapReferences) {
+      if (narrow) {
+        // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+        __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+      } else {
+        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+      }
+    }
+    __ Bind(&return_address);
+    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+              narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+                     : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
   ScaleFactor no_scale_factor = TIMES_1;
@@ -8086,9 +8359,73 @@
   static_assert(
       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  ScaleFactor scale_factor = TIMES_4;
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually kBakerCcEntrypointRegister) the read
+    // barrier mark introspection entrypoint. If `temp` is null, it means
+    // that `GetIsGcMarking()` is false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     HeapReference<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+    vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt);  // Raw pointer.
+    DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
+
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+    uint32_t custom_data =
+        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+    vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+    // entrypoint_reg =
+    //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip.GetCode(), 12u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+    __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+    __ Add(data_reg, obj, Operand(data_offset));
+
+    vixl::EmissionCheckScope guard(
+        GetVIXLAssembler(),
+        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    vixl32::Label return_address;
+    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+    __ cmp(kBakerCcEntrypointRegister, Operand(0));
+    EmitPlaceholderBne(this, bne_label);
+    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+    __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    // Note: We need a Wide NEG for the unpoisoning.
+    if (kPoisonHeapReferences) {
+      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    }
+    __ Bind(&return_address);
+    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+              BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+    return;
+  }
+
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
       instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
@@ -8100,9 +8437,7 @@
                                                                      Location index,
                                                                      ScaleFactor scale_factor,
                                                                      Location temp,
-                                                                     bool needs_null_check,
-                                                                     bool always_update_field,
-                                                                     vixl32::Register* temp2) {
+                                                                     bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -8113,6 +8448,73 @@
   // not.
   //
   // Note that we do not actually check the value of `GetIsGcMarking()`;
+  // instead, we load into `temp2` the read barrier mark entry point
+  // corresponding to register `ref`. If `temp2` is null, it means
+  // that `GetIsGcMarking()` is false, and vice versa.
+  //
+  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //     // Slow path.
+  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //     if (is_gray) {
+  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //     }
+  //   } else {
+  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //   }
+
+  vixl32::Register temp_reg = RegisterFrom(temp);
+
+  // Slow path marking the object `ref` when the GC is marking. The
+  // entrypoint will already be loaded in `temp2`.
+  Location temp2 = LocationFrom(lr);
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+          instruction,
+          ref,
+          obj,
+          offset,
+          index,
+          scale_factor,
+          needs_null_check,
+          temp_reg,
+          /* entrypoint */ temp2);
+  AddSlowPath(slow_path);
+
+  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+  const int32_t entry_point_offset =
+      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
+  // The entrypoint is null when the GC is not marking, this prevents one load compared to
+  // checking GetIsGcMarking.
+  __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel());
+  // Fast path: the GC is not marking: just load the reference.
+  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    vixl32::Register obj,
+                                                                    Location field_offset,
+                                                                    Location temp,
+                                                                    bool needs_null_check,
+                                                                    vixl32::Register temp2) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+  // whether we need to enter the slow path to update the reference
+  // field within `obj`.  Then, in the slow path, check the gray bit
+  // in the lock word of the reference's holder (`obj`) to decide
+  // whether to mark `ref` and update the field or not.
+  //
+  // Note that we do not actually check the value of `GetIsGcMarking()`;
   // instead, we load into `temp3` the read barrier mark entry point
   // corresponding to register `ref`. If `temp3` is null, it means
   // that `GetIsGcMarking()` is false, and vice versa.
@@ -8122,55 +8524,32 @@
   //     // Slow path.
   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
+  //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
+  //       old_ref = ref;
   //       ref = temp3(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   //     }
-  //   } else {
-  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
 
   vixl32::Register temp_reg = RegisterFrom(temp);
 
-  // Slow path marking the object `ref` when the GC is marking. The
-  // entrypoint will already be loaded in `temp3`.
+  // Slow path updating the object reference at address `obj + field_offset`
+  // when the GC is marking. The entrypoint will already be loaded in `temp3`.
   Location temp3 = LocationFrom(lr);
-  SlowPathCodeARMVIXL* slow_path;
-  if (always_update_field) {
-    DCHECK(temp2 != nullptr);
-    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-    // only supports address of the form `obj + field_offset`, where
-    // `obj` is a register and `field_offset` is a register pair (of
-    // which only the lower half is used). Thus `offset` and
-    // `scale_factor` above are expected to be null in this code path.
-    DCHECK_EQ(offset, 0u);
-    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
-    Location field_offset = index;
-    slow_path =
-        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
-            instruction,
-            ref,
-            obj,
-            offset,
-            /* index */ field_offset,
-            scale_factor,
-            needs_null_check,
-            temp_reg,
-            *temp2,
-            /* entrypoint */ temp3);
-  } else {
-    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
-        instruction,
-        ref,
-        obj,
-        offset,
-        index,
-        scale_factor,
-        needs_null_check,
-        temp_reg,
-        /* entrypoint */ temp3);
-  }
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+          instruction,
+          ref,
+          obj,
+          /* offset */ 0u,
+          /* index */ field_offset,
+          /* scale_factor */ ScaleFactor::TIMES_1,
+          needs_null_check,
+          temp_reg,
+          temp2,
+          /* entrypoint */ temp3);
   AddSlowPath(slow_path);
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8182,8 +8561,8 @@
   // The entrypoint is null when the GC is not marking, this prevents one load compared to
   // checking GetIsGcMarking.
   __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
-  // Fast path: just load the reference.
-  GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+  // Fast path: the GC is not marking: nothing to do (the field is
+  // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -8497,24 +8876,9 @@
   return &patches->back();
 }
 
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
-    const DexFile& dex_file,
-    dex::StringIndex string_index) {
-  return boot_image_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() {
-        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
-      });
-}
-
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral(
-    const DexFile& dex_file,
-    dex::TypeIndex type_index) {
-  return boot_image_type_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() {
-        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
-      });
+vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+  baker_read_barrier_patches_.emplace_back(custom_data);
+  return &baker_read_barrier_patches_.back().label;
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
@@ -8570,23 +8934,13 @@
   DCHECK(linker_patches->empty());
   size_t size =
       /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      boot_image_string_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
-      boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const auto& entry : boot_image_string_patches_) {
-    const StringReference& target_string = entry.first;
-    VIXLUInt32Literal* literal = entry.second;
-    DCHECK(literal->IsBound());
-    uint32_t literal_offset = literal->GetLocation();
-    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
-                                                       target_string.dex_file,
-                                                       target_string.string_index.index_));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(pc_relative_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -8599,14 +8953,9 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
-  for (const auto& entry : boot_image_type_patches_) {
-    const TypeReference& target_type = entry.first;
-    VIXLUInt32Literal* literal = entry.second;
-    DCHECK(literal->IsBound());
-    uint32_t literal_offset = literal->GetLocation();
-    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
-                                                     target_type.dex_file,
-                                                     target_type.type_index.index_));
+  for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+    linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+                                                                       info.custom_data));
   }
   DCHECK_EQ(size, linker_patches->size());
 }
@@ -8621,16 +8970,6 @@
       });
 }
 
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodLiteral(
-    MethodReference target_method,
-    MethodToLiteralMap* map) {
-  return map->GetOrCreate(
-      target_method,
-      [this]() {
-        return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
-      });
-}
-
 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -8844,14 +9183,20 @@
 
 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(entry.first);
+    const StringReference& string_reference = entry.first;
+    VIXLUInt32Literal* table_entry_literal = entry.second;
+    const auto it = jit_string_roots_.find(string_reference);
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
   for (const auto& entry : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(entry.first);
+    const TypeReference& type_reference = entry.first;
+    VIXLUInt32Literal* table_entry_literal = entry.second;
+    const auto it = jit_class_roots_.find(type_reference);
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7281069..daba9bf 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -572,10 +572,11 @@
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                       dex::StringIndex string_index);
-  VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                     dex::TypeIndex type_index);
+
+  // Add a new baker read barrier patch and return the label to be bound
+  // before the BNE instruction.
+  vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
   VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                  dex::StringIndex string_index,
@@ -588,6 +589,10 @@
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+  // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+  void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -611,11 +616,6 @@
   // Load the object reference located at the address
   // `obj + offset + (index << scale_factor)`, held by object `obj`, into
   // `ref`, and mark it if needed.
-  //
-  // If `always_update_field` is true, the value of the reference is
-  // atomically updated in the holder (`obj`).  This operation
-  // requires an extra temporary register, which must be provided as a
-  // non-null pointer (`temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  vixl::aarch32::Register obj,
@@ -623,9 +623,27 @@
                                                  Location index,
                                                  ScaleFactor scale_factor,
                                                  Location temp,
-                                                 bool needs_null_check,
-                                                 bool always_update_field = false,
-                                                 vixl::aarch32::Register* temp2 = nullptr);
+                                                 bool needs_null_check);
+
+  // Generate code checking whether the the reference field at the
+  // address `obj + field_offset`, held by object `obj`, needs to be
+  // marked, and if so, marking it and updating the field within `obj`
+  // with the marked value.
+  //
+  // This routine is used for the implementation of the
+  // UnsafeCASObject intrinsic with Baker read barriers.
+  //
+  // This method has a structure similar to
+  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+  // `ref` is only as a temporary here, and thus its value should not
+  // be used afterwards.
+  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+                                                Location ref,
+                                                vixl::aarch32::Register obj,
+                                                Location field_offset,
+                                                Location temp,
+                                                bool needs_null_check,
+                                                vixl::aarch32::Register temp2);
 
   // Generate a heap reference load (with no read barrier).
   void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -703,8 +721,6 @@
                                                                 vixl::aarch32::Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>;
-  using MethodToLiteralMap =
-      ArenaSafeMap<MethodReference, VIXLUInt32Literal*, MethodReferenceComparator>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           VIXLUInt32Literal*,
                                           StringReferenceValueComparator>;
@@ -712,9 +728,14 @@
                                         VIXLUInt32Literal*,
                                         TypeReferenceValueComparator>;
 
+  struct BakerReadBarrierPatchInfo {
+    explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+    vixl::aarch32::Label label;
+    uint32_t custom_data;
+  };
+
   VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
-  VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
-                                              MethodToLiteralMap* map);
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
                                           ArenaDeque<PcRelativePatchInfo>* patches);
@@ -739,16 +760,14 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
-  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // Baker read barrier patch info.
+  ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 357df97..95be3d7 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -493,8 +493,13 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0),
+                   static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -1056,11 +1061,7 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_string_patches_(StringReferenceValueComparator(),
-                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_type_patches_(TypeReferenceValueComparator(),
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1603,9 +1604,7 @@
       pc_relative_dex_cache_patches_.size() +
       pc_relative_string_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size() +
-      boot_image_string_patches_.size() +
-      boot_image_type_patches_.size();
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -1621,24 +1620,6 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
-  for (const auto& entry : boot_image_string_patches_) {
-    const StringReference& target_string = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
-                                                       target_string.dex_file,
-                                                       target_string.string_index.index_));
-  }
-  for (const auto& entry : boot_image_type_patches_) {
-    const TypeReference& target_type = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
-                                                     target_type.dex_file,
-                                                     target_type.type_index.index_));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1674,27 +1655,6 @@
       [this, value]() { return __ NewLiteral<uint32_t>(value); });
 }
 
-Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
-                                                     MethodToLiteralMap* map) {
-  return map->GetOrCreate(
-      target_method,
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                              dex::StringIndex string_index) {
-  return boot_image_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                            dex::TypeIndex type_index) {
-  return boot_image_type_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
 Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
@@ -1775,16 +1735,18 @@
 
 void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const JitPatchInfo& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file,
-                                                            dex::StringIndex(info.index)));
+    const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file,
+                                                           dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
   for (const JitPatchInfo& info : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
-                                                         dex::TypeIndex(info.index)));
+    const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
+                                                        dex::TypeIndex(info.index)));
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
 }
 
@@ -3482,8 +3444,6 @@
 
   Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
-  Register dst = locations->Out().AsRegister<Register>();
-  MipsLabel true_label;
 
   switch (type) {
     default:
@@ -3492,27 +3452,14 @@
       return;
 
     case Primitive::kPrimLong:
-      // TODO: don't use branches.
-      GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
-      break;
+      GenerateLongCompare(instruction->GetCondition(), locations);
+      return;
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
       return;
   }
-
-  // Convert the branches into the result.
-  MipsLabel done;
-
-  // False case: result = 0.
-  __ LoadConst32(dst, 0);
-  __ B(&done);
-
-  // True case: result = 1.
-  __ Bind(&true_label);
-  __ LoadConst32(dst, 1);
-  __ Bind(&done);
 }
 
 void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -4282,6 +4229,221 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond,
+                                                       LocationSummary* locations) {
+  Register dst = locations->Out().AsRegister<Register>();
+  Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_high = ZERO;
+  Register rhs_low = ZERO;
+  int64_t imm = 0;
+  uint32_t imm_high = 0;
+  uint32_t imm_low = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+    imm_high = High32Bits(imm);
+    imm_low = Low32Bits(imm);
+  } else {
+    rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+    rhs_low = rhs_location.AsRegisterPairLow<Register>();
+  }
+  if (use_imm && imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Or(dst, lhs_high, lhs_low);
+        __ Sltiu(dst, dst, 1);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Or(dst, lhs_high, lhs_low);
+        __ Sltu(dst, ZERO, dst);
+        break;
+      case kCondLT:
+        __ Slt(dst, lhs_high, ZERO);
+        break;
+      case kCondGE:
+        __ Slt(dst, lhs_high, ZERO);
+        __ Xori(dst, dst, 1);
+        break;
+      case kCondLE:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Sltu(dst, AT, TMP);
+        __ Xori(dst, dst, 1);
+        break;
+      case kCondGT:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Sltu(dst, AT, TMP);
+        break;
+      case kCondB:  // always false
+        __ Andi(dst, dst, 0);
+        break;
+      case kCondAE:  // always true
+        __ Ori(dst, ZERO, 1);
+        break;
+    }
+  } else if (use_imm) {
+    // TODO: more efficient comparison with constants without loading them into TMP/AT.
+    switch (cond) {
+      case kCondEQ:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(dst, TMP, AT);
+        __ Sltiu(dst, dst, 1);
+        break;
+      case kCondNE:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(dst, TMP, AT);
+        __ Sltu(dst, ZERO, dst);
+        break;
+      case kCondLT:
+      case kCondGE:
+        if (dst == lhs_low) {
+          __ LoadConst32(TMP, imm_low);
+          __ Sltu(dst, lhs_low, TMP);
+        }
+        __ LoadConst32(TMP, imm_high);
+        __ Slt(AT, lhs_high, TMP);
+        __ Slt(TMP, TMP, lhs_high);
+        if (dst != lhs_low) {
+          __ LoadConst32(dst, imm_low);
+          __ Sltu(dst, lhs_low, dst);
+        }
+        __ Slt(dst, TMP, dst);
+        __ Or(dst, dst, AT);
+        if (cond == kCondGE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondGT:
+      case kCondLE:
+        if (dst == lhs_low) {
+          __ LoadConst32(TMP, imm_low);
+          __ Sltu(dst, TMP, lhs_low);
+        }
+        __ LoadConst32(TMP, imm_high);
+        __ Slt(AT, TMP, lhs_high);
+        __ Slt(TMP, lhs_high, TMP);
+        if (dst != lhs_low) {
+          __ LoadConst32(dst, imm_low);
+          __ Sltu(dst, dst, lhs_low);
+        }
+        __ Slt(dst, TMP, dst);
+        __ Or(dst, dst, AT);
+        if (cond == kCondLE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondB:
+      case kCondAE:
+        if (dst == lhs_low) {
+          __ LoadConst32(TMP, imm_low);
+          __ Sltu(dst, lhs_low, TMP);
+        }
+        __ LoadConst32(TMP, imm_high);
+        __ Sltu(AT, lhs_high, TMP);
+        __ Sltu(TMP, TMP, lhs_high);
+        if (dst != lhs_low) {
+          __ LoadConst32(dst, imm_low);
+          __ Sltu(dst, lhs_low, dst);
+        }
+        __ Slt(dst, TMP, dst);
+        __ Or(dst, dst, AT);
+        if (cond == kCondAE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondA:
+      case kCondBE:
+        if (dst == lhs_low) {
+          __ LoadConst32(TMP, imm_low);
+          __ Sltu(dst, TMP, lhs_low);
+        }
+        __ LoadConst32(TMP, imm_high);
+        __ Sltu(AT, TMP, lhs_high);
+        __ Sltu(TMP, lhs_high, TMP);
+        if (dst != lhs_low) {
+          __ LoadConst32(dst, imm_low);
+          __ Sltu(dst, dst, lhs_low);
+        }
+        __ Slt(dst, TMP, dst);
+        __ Or(dst, dst, AT);
+        if (cond == kCondBE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+    }
+  } else {
+    switch (cond) {
+      case kCondEQ:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(dst, TMP, AT);
+        __ Sltiu(dst, dst, 1);
+        break;
+      case kCondNE:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(dst, TMP, AT);
+        __ Sltu(dst, ZERO, dst);
+        break;
+      case kCondLT:
+      case kCondGE:
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Slt(TMP, TMP, AT);
+        __ Slt(AT, lhs_high, rhs_high);
+        __ Or(dst, AT, TMP);
+        if (cond == kCondGE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondGT:
+      case kCondLE:
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Slt(TMP, TMP, AT);
+        __ Slt(AT, rhs_high, lhs_high);
+        __ Or(dst, AT, TMP);
+        if (cond == kCondLE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondB:
+      case kCondAE:
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Slt(TMP, TMP, AT);
+        __ Sltu(AT, lhs_high, rhs_high);
+        __ Or(dst, AT, TMP);
+        if (cond == kCondAE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+      case kCondA:
+      case kCondBE:
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Slt(TMP, TMP, AT);
+        __ Sltu(AT, rhs_high, lhs_high);
+        __ Or(dst, AT, TMP);
+        if (cond == kCondBE) {
+          __ Xori(dst, dst, 1);
+        }
+        break;
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
                                                                 LocationSummary* locations,
                                                                 MipsLabel* label) {
@@ -5199,7 +5361,10 @@
 void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -6828,17 +6993,12 @@
   bool is_r6 = GetInstructionSetFeatures().IsR6();
   bool fallback_load = has_irreducible_loops && !is_r6;
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
@@ -6867,17 +7027,12 @@
     case HLoadClass::LoadKind::kReferrersClass:
       fallback_load = false;
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
@@ -7116,7 +7271,6 @@
   }
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kBssEntry:
@@ -7166,7 +7320,6 @@
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kBssEntry:
@@ -7197,14 +7350,6 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ LoadLiteral(out,
-                     base_or_current_method_reg,
-                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
-                                                               cls->GetTypeIndex()));
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -7311,7 +7456,6 @@
   const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
     case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadString::LoadKind::kBssEntry:
@@ -7361,7 +7505,6 @@
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   switch (load_kind) {
     // We need an extra register for PC-relative literals on R2.
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
     case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadString::LoadKind::kBssEntry:
@@ -7373,13 +7516,6 @@
   }
 
   switch (load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      __ LoadLiteral(out,
-                     base_or_current_method_reg,
-                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
-                                                                 load->GetStringIndex()));
-      return;  // No dex cache slow path.
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
@@ -8205,6 +8341,23 @@
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
     if (result_type == Primitive::kPrimLong) {
       if (isR6) {
         // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
@@ -8212,62 +8365,6 @@
         FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
         Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
         Register dst_low = locations->Out().AsRegisterPairLow<Register>();
-        MipsLabel truncate;
-        MipsLabel done;
-
-        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
-        // value when the input is either a NaN or is outside of the range of the output type
-        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
-        // the same result.
-        //
-        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
-        // value of the output type if the input is outside of the range after the truncation or
-        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
-        // results. This matches the desired float/double-to-int/long conversion exactly.
-        //
-        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
-        //
-        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
-        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
-        // even though it must be NAN2008=1 on R6.
-        //
-        // The code takes care of the different behaviors by first comparing the input to the
-        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
-        // If the input is greater than or equal to the minimum, it procedes to the truncate
-        // instruction, which will handle such an input the same way irrespective of NAN2008.
-        // Otherwise the input is compared to itself to determine whether it is a NaN or not
-        // in order to return either zero or the minimum value.
-        //
-        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
-        // truncate instruction for MIPS64R6.
-        if (input_type == Primitive::kPrimFloat) {
-          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
-          __ LoadConst32(TMP, min_val);
-          __ Mtc1(TMP, FTMP);
-          __ CmpLeS(FTMP, FTMP, src);
-        } else {
-          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
-          __ LoadConst32(TMP, High32Bits(min_val));
-          __ Mtc1(ZERO, FTMP);
-          __ Mthc1(TMP, FTMP);
-          __ CmpLeD(FTMP, FTMP, src);
-        }
-
-        __ Bc1nez(FTMP, &truncate);
-
-        if (input_type == Primitive::kPrimFloat) {
-          __ CmpEqS(FTMP, src, src);
-        } else {
-          __ CmpEqD(FTMP, src, src);
-        }
-        __ Move(dst_low, ZERO);
-        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
-        __ Mfc1(TMP, FTMP);
-        __ And(dst_high, dst_high, TMP);
-
-        __ B(&done);
-
-        __ Bind(&truncate);
 
         if (input_type == Primitive::kPrimFloat) {
           __ TruncLS(FTMP, src);
@@ -8276,8 +8373,6 @@
         }
         __ Mfc1(dst_low, FTMP);
         __ Mfhc1(dst_high, FTMP);
-
-        __ Bind(&done);
       } else {
         QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l
                                                                                : kQuickD2l;
@@ -8294,43 +8389,19 @@
       MipsLabel truncate;
       MipsLabel done;
 
-      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
-      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
-      // even though it must be NAN2008=1 on R6.
-      //
-      // For details see the large comment above for the truncation of float/double to long on R6.
-      //
-      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
-      // truncate instruction for MIPS64R6.
-      if (input_type == Primitive::kPrimFloat) {
-        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
-        __ LoadConst32(TMP, min_val);
-        __ Mtc1(TMP, FTMP);
-      } else {
-        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
-        __ LoadConst32(TMP, High32Bits(min_val));
-        __ Mtc1(ZERO, FTMP);
-        __ MoveToFpuHigh(TMP, FTMP);
-      }
-
-      if (isR6) {
+      if (!isR6) {
         if (input_type == Primitive::kPrimFloat) {
-          __ CmpLeS(FTMP, FTMP, src);
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
         } else {
-          __ CmpLeD(FTMP, FTMP, src);
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ MoveToFpuHigh(TMP, FTMP);
         }
-        __ Bc1nez(FTMP, &truncate);
 
         if (input_type == Primitive::kPrimFloat) {
-          __ CmpEqS(FTMP, src, src);
-        } else {
-          __ CmpEqD(FTMP, src, src);
-        }
-        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
-        __ Mfc1(TMP, FTMP);
-        __ And(dst, dst, TMP);
-      } else {
-        if (input_type == Primitive::kPrimFloat) {
           __ ColeS(0, FTMP, src);
         } else {
           __ ColeD(0, FTMP, src);
@@ -8344,12 +8415,12 @@
         }
         __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
         __ Movf(dst, ZERO, 0);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
       }
 
-      __ B(&done);
-
-      __ Bind(&truncate);
-
       if (input_type == Primitive::kPrimFloat) {
         __ TruncWS(FTMP, src);
       } else {
@@ -8357,7 +8428,9 @@
       }
       __ Mfc1(dst, FTMP);
 
-      __ Bind(&done);
+      if (!isR6) {
+        __ Bind(&done);
+      }
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 03939e3..449cb4c 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -295,6 +295,7 @@
   void GenerateIntCompareAndBranch(IfCondition cond,
                                    LocationSummary* locations,
                                    MipsLabel* label);
+  void GenerateLongCompare(IfCondition cond, LocationSummary* locations);
   void GenerateLongCompareAndBranch(IfCondition cond,
                                     LocationSummary* locations,
                                     MipsLabel* label);
@@ -587,9 +588,6 @@
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                             dex::StringIndex string_index);
-  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
   void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base);
@@ -623,16 +621,8 @@
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
-  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
-  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              Literal*,
-                                              StringReferenceValueComparator>;
-  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                            Literal*,
-                                            TypeReferenceValueComparator>;
 
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
-  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
                                           ArenaDeque<PcRelativePatchInfo>* patches);
@@ -654,12 +644,8 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HMipsDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
-  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  BootTypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index a9c4964..5cdff5a 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -302,10 +302,13 @@
       : SlowPathCodeMIPS64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);     // Only saves live vector registers for SIMD.
     mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, locations);  // Only restores live vector registers for SIMD.
     if (successor_ == nullptr) {
       __ Bc(GetReturnLabel());
     } else {
@@ -393,8 +396,13 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
+      LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0),
+                   static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -950,11 +958,7 @@
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_string_patches_(StringReferenceValueComparator(),
-                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_type_patches_(TypeReferenceValueComparator(),
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
@@ -1438,9 +1442,7 @@
       pc_relative_dex_cache_patches_.size() +
       pc_relative_string_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size() +
-      boot_image_string_patches_.size() +
-      boot_image_type_patches_.size();
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -1456,24 +1458,6 @@
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
-  for (const auto& entry : boot_image_string_patches_) {
-    const StringReference& target_string = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
-                                                       target_string.dex_file,
-                                                       target_string.string_index.index_));
-  }
-  for (const auto& entry : boot_image_type_patches_) {
-    const TypeReference& target_type = entry.first;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
-                                                     target_type.dex_file,
-                                                     target_type.type_index.index_));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1515,27 +1499,6 @@
       [this, value]() { return __ NewLiteral<uint64_t>(value); });
 }
 
-Literal* CodeGeneratorMIPS64::DeduplicateMethodLiteral(MethodReference target_method,
-                                                       MethodToLiteralMap* map) {
-  return map->GetOrCreate(
-      target_method,
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS64::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                                dex::StringIndex string_index) {
-  return boot_image_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                              dex::TypeIndex type_index) {
-  return boot_image_type_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
 Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) {
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
@@ -1581,14 +1544,20 @@
 
 void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(entry.first);
+    const StringReference& string_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    const auto it = jit_string_roots_.find(string_reference);
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
   for (const auto& entry : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(entry.first);
+    const TypeReference& type_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    const auto it = jit_class_roots_.find(type_reference);
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, entry.second, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
   }
 }
 
@@ -1636,13 +1605,19 @@
 }
 
 size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64DoublewordSize;
+  __ StoreFpuToOffset(GetGraph()->HasSIMD() ? kStoreQuadword : kStoreDoubleword,
+                      FpuRegister(reg_id),
+                      SP,
+                      stack_index);
+  return GetFloatingPointSpillSlotSize();
 }
 
 size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
-  return kMips64DoublewordSize;
+  __ LoadFpuFromOffset(GetGraph()->HasSIMD() ? kLoadQuadword : kLoadDoubleword,
+                       FpuRegister(reg_id),
+                       SP,
+                       stack_index);
+  return GetFloatingPointSpillSlotSize();
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -3905,7 +3880,10 @@
 void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -4875,22 +4853,16 @@
     HLoadString::LoadKind desired_string_load_kind) {
   bool fallback_load = false;
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
   }
   if (fallback_load) {
     desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
@@ -4907,20 +4879,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -5149,14 +5115,6 @@
                               ArtMethod::DeclaringClassOffset().Int32Value(),
                               read_barrier_option);
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ LoadLiteral(out,
-                     kLoadUnsignedWord,
-                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
-                                                               cls->GetTypeIndex()));
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -5269,13 +5227,6 @@
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
 
   switch (load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      __ LoadLiteral(out,
-                     kLoadUnsignedWord,
-                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
-                                                                 load->GetStringIndex()));
-      return;  // No dex cache slow path.
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
@@ -5838,7 +5789,11 @@
 void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  // In suspend check slow path, usually there are no caller-save registers at all.
+  // If SIMD instructions are present, however, we force spilling all live SIMD
+  // registers in full width (since the runtime only saves/restores lower part).
+  locations->SetCustomSlowPathCallerSaves(
+      GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5965,68 +5920,6 @@
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
     GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
     FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
-    Mips64Label truncate;
-    Mips64Label done;
-
-    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
-    // value when the input is either a NaN or is outside of the range of the output type
-    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
-    // the same result.
-    //
-    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
-    // value of the output type if the input is outside of the range after the truncation or
-    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
-    // results. This matches the desired float/double-to-int/long conversion exactly.
-    //
-    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
-    //
-    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
-    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
-    // even though it must be NAN2008=1 on R6.
-    //
-    // The code takes care of the different behaviors by first comparing the input to the
-    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
-    // If the input is greater than or equal to the minimum, it procedes to the truncate
-    // instruction, which will handle such an input the same way irrespective of NAN2008.
-    // Otherwise the input is compared to itself to determine whether it is a NaN or not
-    // in order to return either zero or the minimum value.
-    //
-    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
-    // truncate instruction for MIPS64R6.
-    if (input_type == Primitive::kPrimFloat) {
-      uint32_t min_val = (result_type == Primitive::kPrimLong)
-          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
-          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
-      __ LoadConst32(TMP, min_val);
-      __ Mtc1(TMP, FTMP);
-      __ CmpLeS(FTMP, FTMP, src);
-    } else {
-      uint64_t min_val = (result_type == Primitive::kPrimLong)
-          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
-          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
-      __ LoadConst64(TMP, min_val);
-      __ Dmtc1(TMP, FTMP);
-      __ CmpLeD(FTMP, FTMP, src);
-    }
-
-    __ Bc1nez(FTMP, &truncate);
-
-    if (input_type == Primitive::kPrimFloat) {
-      __ CmpEqS(FTMP, src, src);
-    } else {
-      __ CmpEqD(FTMP, src, src);
-    }
-    if (result_type == Primitive::kPrimLong) {
-      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
-    } else {
-      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
-    }
-    __ Mfc1(TMP, FTMP);
-    __ And(dst, dst, TMP);
-
-    __ Bc(&done);
-
-    __ Bind(&truncate);
 
     if (result_type == Primitive::kPrimLong) {
       if (input_type == Primitive::kPrimFloat) {
@@ -6043,8 +5936,6 @@
       }
       __ Mfc1(dst, FTMP);
     }
-
-    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 200e884..1f34ced 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -336,7 +336,11 @@
 
   size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    return GetGraph()->HasSIMD()
+        ? 2 * kMips64DoublewordSize   // 16 bytes for each spill.
+        : 1 * kMips64DoublewordSize;  //  8 bytes for each spill.
+  }
 
   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     return assembler_.GetLabelLocation(GetLabelOf(block));
@@ -549,9 +553,6 @@
                                                        uint32_t element_offset);
   PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file,
                                               uint32_t method_index);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                             dex::StringIndex string_index);
-  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
 
   void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out);
@@ -570,23 +571,15 @@
  private:
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
-  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           Literal*,
                                           StringReferenceValueComparator>;
   using TypeToLiteralMap = ArenaSafeMap<TypeReference,
                                         Literal*,
                                         TypeReferenceValueComparator>;
-  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              Literal*,
-                                              StringReferenceValueComparator>;
-  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                            Literal*,
-                                            TypeReferenceValueComparator>;
 
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateUint64Literal(uint64_t value);
-  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
 
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
@@ -612,12 +605,8 @@
   Uint64ToLiteralMap uint64_literals_;
   // PC-relative patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
-  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  BootTypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b..a41adca 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -468,7 +468,50 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+      } else {
+        __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+      } else {
+        __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      } else {
+        __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      }
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
@@ -476,7 +519,50 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+      } else {
+        __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+      } else {
+        __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      } else {
+        __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      }
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
@@ -783,6 +869,12 @@
     /*out*/ Register* scratch) {
   LocationSummary* locations = instruction->GetLocations();
   Register base = InputRegisterAt(instruction, 0);
+
+  if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+    DCHECK(!is_string_char_at);
+    return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+  }
+
   Location index = locations->InAt(1);
   uint32_t offset = is_string_char_at
       ? mirror::String::ValueOffset().Uint32Value()
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 5bb19c1..14782d7 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -483,7 +483,51 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminub(dst, src);
+      } else {
+        __ pminsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminuw(dst, src);
+      } else {
+        __ pminsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminud(dst, src);
+      } else {
+        __ pminsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
@@ -491,7 +535,51 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxub(dst, src);
+      } else {
+        __ pmaxsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxuw(dst, src);
+      } else {
+        __ pmaxsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxud(dst, src);
+      } else {
+        __ pmaxsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 6d4aae8..246044e 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -353,6 +353,10 @@
   DCHECK(locations->InAt(0).Equals(locations->Out()));
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+  DCHECK(instruction->IsRounded());
+  DCHECK(instruction->IsUnsigned());
+
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
@@ -472,7 +476,51 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminub(dst, src);
+      } else {
+        __ pminsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminuw(dst, src);
+      } else {
+        __ pminsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminud(dst, src);
+      } else {
+        __ pminsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
@@ -480,7 +528,51 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxub(dst, src);
+      } else {
+        __ pmaxsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxuw(dst, src);
+      } else {
+        __ pmaxsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxud(dst, src);
+      } else {
+        __ pmaxsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1e867dd..4a279d8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -384,8 +384,14 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    x86_codegen->Load32BitValue(
+        calling_convention.GetRegisterAt(0),
+        static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -1688,7 +1694,10 @@
 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
@@ -4624,12 +4633,7 @@
 
 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
-  HX86ComputeBaseMethodAddress* address = nullptr;
-  if (GetCompilerOptions().GetCompilePic()) {
-    address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
-  } else {
-    DCHECK_EQ(load_string->InputCount(), 0u);
-  }
+  HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
   string_patches_.emplace_back(address,
                                load_string->GetDexFile(),
                                load_string->GetStringIndex().index_);
@@ -4637,12 +4641,7 @@
 }
 
 void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
-  HX86ComputeBaseMethodAddress* address = nullptr;
-  if (GetCompilerOptions().GetCompilePic()) {
-    address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
-  } else {
-    DCHECK_EQ(load_class->InputCount(), 0u);
-  }
+  HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
   boot_image_type_patches_.emplace_back(address,
                                         load_class->GetDexFile(),
                                         load_class->GetTypeIndex().index_);
@@ -4700,23 +4699,13 @@
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(boot_image_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
-  } else if (GetCompilerOptions().GetCompilePic()) {
+  if (GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
-    for (const PatchInfo<Label>& info : boot_image_type_patches_) {
-      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
-    }
-    for (const PatchInfo<Label>& info : string_patches_) {
-      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-      linker_patches->push_back(
-          LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index));
-    }
+    DCHECK(boot_image_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
@@ -6045,20 +6034,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      FALLTHROUGH_INTENDED;
     case HLoadClass::LoadKind::kBssEntry:
-      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6149,13 +6132,6 @@
           read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
-      __ movl(out, Immediate(/* placeholder */ 0));
-      codegen_->RecordBootTypePatch(cls);
-      break;
-    }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6243,20 +6219,14 @@
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      break;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      FALLTHROUGH_INTENDED;
     case HLoadString::LoadKind::kBssEntry:
-      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6308,12 +6278,6 @@
   Register out = out_loc.AsRegister<Register>();
 
   switch (load->GetLoadKind()) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
-      __ movl(out, Immediate(/* placeholder */ 0));
-      codegen_->RecordBootStringPatch(load);
-      return;  // No dex cache slow path.
-    }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       Register method_address = locations->InAt(0).AsRegister<Register>();
@@ -7694,7 +7658,7 @@
     constant_area_start_ = assembler->CodeSize();
 
     // Populate any jump tables.
-    for (auto jump_table : fixups_to_jump_tables_) {
+    for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
       jump_table->CreateJumpTable();
     }
 
@@ -7833,17 +7797,19 @@
 
 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(
+    const auto it = jit_string_roots_.find(
         StringReference(&info.dex_file, dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
 
   for (const PatchInfo<Label>& info : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(
+    const auto it = jit_class_roots_.find(
         TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ca3a9ea..f08d642 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -633,9 +633,9 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
+  // String patch locations; type depends on configuration (app .bss or boot image).
   ArenaDeque<X86PcRelativePatchInfo> string_patches_;
-  // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f413739..ac0f37b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -397,8 +397,14 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    x86_64_codegen->Load32BitValue(
+        CpuRegister(calling_convention.GetRegisterAt(0)),
+        static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -1128,14 +1134,13 @@
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(boot_image_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
-  } else {
-    // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind().
+  if (GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
+  } else {
+    DCHECK(boot_image_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
@@ -1710,7 +1715,10 @@
 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
@@ -5449,21 +5457,14 @@
       UNREACHABLE();
     case HLoadClass::LoadKind::kReferrersClass:
       break;
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      // We prefer the always-available RIP-relative address for the x86-64 boot image.
-      return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadClass::LoadKind::kBootImageAddress:
-      break;
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadClass::LoadKind::kBootImageAddress:
     case HLoadClass::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -5626,21 +5627,14 @@
 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!GetCompilerOptions().GetCompilePic());
-      // We prefer the always-available RIP-relative address for the x86-64 boot image.
-      return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(GetCompilerOptions().GetCompilePic());
-      break;
-    case HLoadString::LoadKind::kBootImageAddress:
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -7046,7 +7040,7 @@
     constant_area_start_ = assembler->CodeSize();
 
     // Populate any jump tables.
-    for (auto jump_table : fixups_to_jump_tables_) {
+    for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
       jump_table->CreateJumpTable();
     }
 
@@ -7140,17 +7134,19 @@
 
 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(
+    const auto it = jit_string_roots_.find(
         StringReference(&info.dex_file, dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
 
   for (const PatchInfo<Label>& info : jit_class_patches_) {
-    const auto& it = jit_class_roots_.find(
+    const auto it = jit_class_roots_.find(
         TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
     DCHECK(it != jit_class_roots_.end());
-    PatchJitRootUse(code, roots_data, info, it->second);
+    uint64_t index_in_table = it->second;
+    PatchJitRootUse(code, roots_data, info, index_in_table);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c8336da..d8005cc 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -603,9 +603,9 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image PIC).
+  // String patch locations; type depends on configuration (app .bss or boot image).
   ArenaDeque<PatchInfo<Label>> string_patches_;
-  // Type patch locations for boot image (always PIC).
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4ba5c55..fe25b76 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -64,7 +64,7 @@
 #endif
   };
 
-  for (auto test_config : test_config_candidates) {
+  for (const CodegenTargetConfig& test_config : test_config_candidates) {
     if (CanExecute(test_config.GetInstructionSet())) {
       v.push_back(test_config);
     }
@@ -76,7 +76,7 @@
 static void TestCode(const uint16_t* data,
                      bool has_result = false,
                      int32_t expected = 0) {
-  for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+  for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
     HGraph* graph = CreateCFG(&arena, data);
@@ -89,7 +89,7 @@
 static void TestCodeLong(const uint16_t* data,
                          bool has_result,
                          int64_t expected) {
-  for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+  for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
     HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
@@ -754,7 +754,28 @@
   //
   //   Assertion failed (!available->IsEmpty())
   //
-  // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+  // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable,
+  // because of the following situation:
+  //
+  //   1. a temp register (IP0) is allocated as a scratch register by
+  //      the parallel move resolver to solve a cycle (swap):
+  //
+  //        [ source=DS0 destination=DS257 type=PrimDouble instruction=null ]
+  //        [ source=DS257 destination=DS0 type=PrimDouble instruction=null ]
+  //
+  //   2. within CodeGeneratorARM64::MoveLocation, another temp
+  //      register (IP1) is allocated to generate the swap between two
+  //      double stack slots;
+  //
+  //   3. VIXL requires a third temp register to emit the `Ldr` or
+  //      `Str` operation from CodeGeneratorARM64::MoveLocation (as
+  //      one of the stack slots' offsets cannot be encoded as an
+  //      immediate), but the pool of (core) temp registers is now
+  //      empty.
+  //
+  // The solution used so far is to use a floating-point temp register
+  // (D31) in step #2, so that IP1 is available for step #3.
+
   HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
   move->AddMove(Location::DoubleStackSlot(0),
                 Location::DoubleStackSlot(257),
@@ -807,7 +828,6 @@
   InternalCodeAllocator code_allocator;
   codegen.Finalize(&code_allocator);
 }
-
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 31cd204..00a16fe 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -243,7 +243,7 @@
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   if (!graph_checker.IsValid()) {
-    for (const auto& error : graph_checker.GetErrors()) {
+    for (const std::string& error : graph_checker.GetErrors()) {
       std::cout << error << std::endl;
     }
   }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 6a14045..aea901d 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -338,19 +338,21 @@
 
   // Ensure the inputs of `instruction` are defined in a block of the graph.
   for (HInstruction* input : instruction->GetInputs()) {
-    const HInstructionList& list = input->IsPhi()
-        ? input->GetBlock()->GetPhis()
-        : input->GetBlock()->GetInstructions();
     if (input->GetBlock() == nullptr) {
       AddError(StringPrintf("Input %d of instruction %d is not in any "
                             "basic block of the control-flow graph.",
                             input->GetId(),
                             instruction->GetId()));
-    } else if (!list.Contains(input)) {
-      AddError(StringPrintf("Input %d of instruction %d is not defined "
-                            "in a basic block of the control-flow graph.",
-                            input->GetId(),
-                            instruction->GetId()));
+    } else {
+      const HInstructionList& list = input->IsPhi()
+          ? input->GetBlock()->GetPhis()
+          : input->GetBlock()->GetInstructions();
+      if (!list.Contains(input)) {
+        AddError(StringPrintf("Input %d of instruction %d is not defined "
+                              "in a basic block of the control-flow graph.",
+                              input->GetId(),
+                              instruction->GetId()));
+      }
     }
   }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e5d94c3..02816cf 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -514,6 +514,14 @@
     StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
   }
 
+  void VisitVecMin(HVecMin* min) OVERRIDE {
+    StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
+  }
+
+  void VisitVecMax(HVecMax* max) OVERRIDE {
+    StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
+  }
+
   void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c93bc21..8ea312d 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -516,13 +516,13 @@
 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
 
-  for (auto dominated_block : block->GetDominatedBlocks()) {
+  for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
       return true;
     }
   }
 
-  for (auto successor : block->GetSuccessors()) {
+  for (const HBasicBlock* successor : block->GetSuccessors()) {
     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
       return true;
     }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3b681c1..8674e72 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -783,7 +783,7 @@
   HInstruction* compare = new (graph_->GetArena()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
   HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
-      graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
+      graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc);
 
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -860,7 +860,9 @@
         graph_->GetArena(),
         compare,
         receiver,
-        HDeoptimize::Kind::kInline,
+        Runtime::Current()->IsAotCompiler()
+            ? DeoptimizationKind::kAotInlineCache
+            : DeoptimizationKind::kJitInlineCache,
         invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -1147,7 +1149,7 @@
         graph_->GetArena(),
         compare,
         receiver,
-        HDeoptimize::Kind::kInline,
+        DeoptimizationKind::kJitSameTarget,
         invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2dcc12e..2cedde9 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -257,7 +257,8 @@
 
   if (shift_amount->IsConstant()) {
     int64_t cst = Int64FromConstant(shift_amount->AsConstant());
-    if ((cst & implicit_mask) == 0) {
+    int64_t masked_cst = cst & implicit_mask;
+    if (masked_cst == 0) {
       // Replace code looking like
       //    SHL dst, value, 0
       // with
@@ -266,6 +267,17 @@
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
       return;
+    } else if (masked_cst != cst) {
+      // Replace code looking like
+      //    SHL dst, value, cst
+      // where cst exceeds maximum distance with the equivalent
+      //    SHL dst, value, cst & implicit_mask
+      // (as defined by shift semantics). This ensures other
+      // optimizations do not need to special case for such situations.
+      DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt);
+      instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1);
+      RecordSimplification();
+      return;
     }
   }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f16e372..311be1f 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -216,5 +216,18 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
+  if (!instruction->IsStringCharAt()
+      && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
+  if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+    RecordSimplification();
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eec4e49..8596f6a 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -75,6 +75,8 @@
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitVecMul(HVecMul* instruction) OVERRIDE;
+  void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+  void VisitVecStore(HVecStore* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c39e5f4..e5a8499 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,8 @@
 
 #include "instruction_simplifier_shared.h"
 
+#include "mirror/array-inl.h"
+
 namespace art {
 
 namespace {
@@ -346,4 +348,59 @@
   return false;
 }
 
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
+  if (index->IsConstant()) {
+    // If index is constant the whole address calculation often can be done by LDR/STR themselves.
+    // TODO: Treat the case with not-embedable constant.
+    return false;
+  }
+
+  HGraph* graph = access->GetBlock()->GetGraph();
+  ArenaAllocator* arena = graph->GetArena();
+  Primitive::Type packed_type = access->GetPackedType();
+  uint32_t data_offset = mirror::Array::DataOffset(
+      Primitive::ComponentSize(packed_type)).Uint32Value();
+  size_t component_shift = Primitive::ComponentSizeShift(packed_type);
+
+  bool is_extracting_beneficial = false;
+  // It is beneficial to extract index intermediate address only if there are at least 2 users.
+  for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (user->IsVecMemoryOperation() && user != access) {
+      HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
+      Primitive::Type another_packed_type = another_access->GetPackedType();
+      uint32_t another_data_offset = mirror::Array::DataOffset(
+          Primitive::ComponentSize(another_packed_type)).Uint32Value();
+      size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type);
+      if (another_data_offset == data_offset && another_component_shift == component_shift) {
+        is_extracting_beneficial = true;
+        break;
+      }
+    } else if (user->IsIntermediateAddressIndex()) {
+      HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
+      uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
+      size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+      if (another_data_offset == data_offset && another_component_shift == component_shift) {
+        is_extracting_beneficial = true;
+        break;
+      }
+    }
+  }
+
+  if (!is_extracting_beneficial) {
+    return false;
+  }
+
+  // Proceed to extract the index + data_offset address computation.
+  HIntConstant* offset = graph->GetIntConstant(data_offset);
+  HIntConstant* shift = graph->GetIntConstant(component_shift);
+  HIntermediateAddressIndex* address =
+      new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
+
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 1);
+
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 2ea103a..371619f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -59,6 +59,7 @@
                                   size_t data_offset);
 
 bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
 
 }  // namespace art
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 750f9cc..1df884e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1010,17 +1010,14 @@
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          ScaleFactor::TIMES_1,
+          /* field_offset */ offset_loc,
           tmp_ptr_loc,
           /* needs_null_check */ false,
-          /* always_update_field */ true,
-          &tmp);
+          tmp);
     }
   }
 
@@ -1648,6 +1645,8 @@
     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
     // temporary register from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_);
+    arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
   }
 }
 
@@ -2754,6 +2753,27 @@
   }
 }
 
+void IntrinsicLocationsBuilderARM::VisitThreadInterrupted(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitThreadInterrupted(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+  int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
+  __ LoadFromOffset(kLoadWord, out, TR, offset);
+  Label done;
+  __ CompareAndBranchIfZero(out, &done);
+  __ dmb(ISH);
+  __ LoadImmediate(IP, 0);
+  __ StoreToOffset(kStoreWord, IP, TR, offset);
+  __ dmb(ISH);
+  __ Bind(&done);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4d36015..b511c5a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1154,17 +1154,14 @@
       Register temp = WRegisterFrom(locations->GetTemp(0));
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          /* scale_factor */ 0u,
+          /* field_offset */ offset_loc,
           temp,
           /* needs_null_check */ false,
-          /* use_load_acquire */ false,
-          /* always_update_field */ true);
+          /* use_load_acquire */ false);
     }
   }
 
@@ -3036,6 +3033,28 @@
   }
 }
 
+void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
+  MacroAssembler* masm = GetVIXLAssembler();
+  Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt);
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireX();
+
+  __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
+  __ Ldar(out.W(), MemOperand(temp));
+
+  vixl::aarch64::Label done;
+  __ Cbz(out.W(), &done);
+  __ Stlr(wzr, MemOperand(temp));
+  __ Bind(&done);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fd8a37a..2d9781a 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1347,17 +1347,14 @@
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
       // Need to make sure the reference stored in the field is a to-space
       // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+      codegen->UpdateReferenceFieldWithBakerReadBarrier(
           invoke,
           out_loc,  // Unused, used only as a "temporary" within the read barrier.
           base,
-          /* offset */ 0u,
-          /* index */ offset_loc,
-          ScaleFactor::TIMES_1,
+          /* field_offset */ offset_loc,
           tmp_ptr_loc,
           /* needs_null_check */ false,
-          /* always_update_field */ true,
-          &tmp);
+          tmp);
     }
   }
 
@@ -2026,6 +2023,8 @@
     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
     // temporary register from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
+    arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
   }
 }
 
@@ -3158,6 +3157,29 @@
   }
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
+  int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
+  __ Ldr(out, MemOperand(tr, offset));
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  vixl32::Label done;
+  __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+  __ Dmb(vixl32::ISH);
+  __ Mov(temp, 0);
+  assembler->StoreToOffset(kStoreWord, temp, tr, offset);
+  __ Dmb(vixl32::ISH);
+  __ Bind(&done);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index abf5b12..4731da1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2555,101 +2555,110 @@
   Register out = locations->Out().AsRegister<Register>();
 
   MipsLabel done;
-  MipsLabel finite;
-  MipsLabel add;
 
-  // if (in.isNaN) {
-  //   return 0;
-  // }
-  //
-  // out = floor.w.s(in);
-  //
-  // /*
-  //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
-  //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
-  //  * too large to fit in a 32-bit integer.
-  //  *
-  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
-  //  * numbers which are too large to be represented in a 32-bit signed
-  //  * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
-  //  * and will no longer be processed by this "if" statement.
-  //  */
-  // if (out == Integer.MAX_VALUE) {
-  //   TMP = (in < 0.0f) ? 1 : 0;
-  //   /*
-  //    * If TMP is 1, then adding it to out will wrap its value from
-  //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
-  //    */
-  //   return out += TMP;
-  // }
-  //
-  // /*
-  //  * For negative values not handled by the previous "if" statement the
-  //  * test here will correctly set the value of TMP.
-  //  */
-  // TMP = ((in - out) >= 0.5f) ? 1 : 0;
-  // return out += TMP;
-
-  // Test for NaN.
   if (IsR6()) {
-    __ CmpUnS(FTMP, in, in);
-  } else {
-    __ CunS(in, in);
-  }
+    // out = floor(in);
+    //
+    // if (out != MAX_VALUE && out != MIN_VALUE) {
+    //     TMP = ((in - out) >= 0.5) ? 1 : 0;
+    //     return out += TMP;
+    // }
+    // return out;
 
-  // Return zero for NaN.
-  __ Move(out, ZERO);
-  if (IsR6()) {
-    __ Bc1nez(FTMP, &done);
-  } else {
-    __ Bc1t(&done);
-  }
+    // out = floor(in);
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
 
-  // out = floor(in);
-  __ FloorWS(FTMP, in);
-  __ Mfc1(out, FTMP);
+    // if (out != MAX_VALUE && out != MIN_VALUE)
+    __ Addiu(TMP, out, 1);
+    __ Aui(TMP, TMP, 0x8000);  // TMP = out + 0x8000 0001
+                               // or    out - 0x7FFF FFFF.
+                               // IOW, TMP = 1 if out = Int.MIN_VALUE
+                               // or   TMP = 0 if out = Int.MAX_VALUE.
+    __ Srl(TMP, TMP, 1);       // TMP = 0 if out = Int.MIN_VALUE
+                               //         or out = Int.MAX_VALUE.
+    __ Beqz(TMP, &done);
 
-  if (!IsR6()) {
-    __ LoadConst32(TMP, -1);
-  }
+    // TMP = (0.5f <= (in - out)) ? -1 : 0;
+    __ Cvtsw(FTMP, FTMP);      // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
 
-  // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
-  __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
-  __ Bne(AT, out, &finite);
-
-  __ Mtc1(ZERO, FTMP);
-  if (IsR6()) {
-    __ CmpLtS(FTMP, in, FTMP);
-    __ Mfc1(TMP, FTMP);
-  } else {
-    __ ColtS(in, FTMP);
-  }
-
-  __ B(&add);
-
-  __ Bind(&finite);
-
-  // TMP = (0.5f <= (in - out)) ? -1 : 0;
-  __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
-  __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
-  __ SubS(FTMP, in, FTMP);
-  __ Mtc1(AT, half);
-  if (IsR6()) {
     __ CmpLeS(FTMP, half, FTMP);
     __ Mfc1(TMP, FTMP);
+
+    // Return out -= TMP.
+    __ Subu(out, out, TMP);
   } else {
+    // if (in.isNaN) {
+    //   return 0;
+    // }
+    //
+    // out = floor.w.s(in);
+    //
+    // /*
+    //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
+    //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+    //  * too large to fit in a 32-bit integer.
+    //  */
+    // if (out == Integer.MAX_VALUE) {
+    //   TMP = (in < 0.0f) ? 1 : 0;
+    //   /*
+    //    * If TMP is 1, then adding it to out will wrap its value from
+    //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
+    //    */
+    //   return out += TMP;
+    // }
+    //
+    // /*
+    //  * For negative values not handled by the previous "if" statement the
+    //  * test here will correctly set the value of TMP.
+    //  */
+    // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+    // return out += TMP;
+
+    MipsLabel finite;
+    MipsLabel add;
+
+    // Test for NaN.
+    __ CunS(in, in);
+
+    // Return zero for NaN.
+    __ Move(out, ZERO);
+    __ Bc1t(&done);
+
+    // out = floor(in);
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
+
+    __ LoadConst32(TMP, -1);
+
+    // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
+    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+    __ Bne(AT, out, &finite);
+
+    __ Mtc1(ZERO, FTMP);
+    __ ColtS(in, FTMP);
+
+    __ B(&add);
+
+    __ Bind(&finite);
+
+    // TMP = (0.5f <= (in - out)) ? -1 : 0;
+    __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
     __ ColeS(half, FTMP);
-  }
 
-  __ Bind(&add);
+    __ Bind(&add);
 
-  if (!IsR6()) {
     __ Movf(TMP, ZERO);
+
+    // Return out -= TMP.
+    __ Subu(out, out, TMP);
   }
-
-  // Return out -= TMP.
-  __ Subu(out, out, TMP);
-
   __ Bind(&done);
 }
 
@@ -3248,6 +3257,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, ThreadInterrupted)
+
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 9dce59b..00afbcd 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -890,54 +890,14 @@
   DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
 
   Mips64Label done;
-  Mips64Label finite;
-  Mips64Label add;
 
-  // if (in.isNaN) {
-  //   return 0;
-  // }
-  //
   // out = floor(in);
   //
-  // /*
-  //  * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
-  //  *
-  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
-  //  * numbers which are too large to be represented in a 32-/64-bit
-  //  * signed integer will be processed by floor.X.Y to output
-  //  * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
-  //  * processed by this "if" statement.
-  //  *
-  //  * However, this bug in the 64-bit MIPS emulator causes the
-  //  * behavior of floor.X.Y to be the same as pre-R6 implementations
-  //  * of MIPS64.  When that bug is fixed this logic should be amended.
-  //  */
-  // if (out == MAX_VALUE) {
-  //   TMP = (in < 0.0) ? 1 : 0;
-  //   /*
-  //    * If TMP is 1, then adding it to out will wrap its value from
-  //    * MAX_VALUE to MIN_VALUE.
-  //    */
+  // if (out != MAX_VALUE && out != MIN_VALUE) {
+  //   TMP = ((in - out) >= 0.5) ? 1 : 0;
   //   return out += TMP;
   // }
-  //
-  // /*
-  //  * For negative values not handled by the previous "if" statement the
-  //  * test here will correctly set the value of TMP.
-  //  */
-  // TMP = ((in - out) >= 0.5) ? 1 : 0;
-  // return out += TMP;
-
-  // Test for NaN.
-  if (type == Primitive::kPrimDouble) {
-    __ CmpUnD(FTMP, in, in);
-  } else {
-    __ CmpUnS(FTMP, in, in);
-  }
-
-  // Return zero for NaN.
-  __ Move(out, ZERO);
-  __ Bc1nez(FTMP, &done);
+  // return out;
 
   // out = floor(in);
   if (type == Primitive::kPrimDouble) {
@@ -948,27 +908,26 @@
     __ Mfc1(out, FTMP);
   }
 
-  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  // if (out != MAX_VALUE && out != MIN_VALUE)
   if (type == Primitive::kPrimDouble) {
-    __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+    __ Daddiu(TMP, out, 1);
+    __ Dati(TMP, 0x8000);  // TMP = out + 0x8000 0000 0000 0001
+                           // or    out - 0x7FFF FFFF FFFF FFFF.
+                           // IOW, TMP = 1 if out = Long.MIN_VALUE
+                           // or   TMP = 0 if out = Long.MAX_VALUE.
+    __ Dsrl(TMP, TMP, 1);  // TMP = 0 if out = Long.MIN_VALUE
+                           //         or out = Long.MAX_VALUE.
+    __ Beqzc(TMP, &done);
   } else {
-    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+    __ Addiu(TMP, out, 1);
+    __ Aui(TMP, TMP, 0x8000);  // TMP = out + 0x8000 0001
+                               // or    out - 0x7FFF FFFF.
+                               // IOW, TMP = 1 if out = Int.MIN_VALUE
+                               // or   TMP = 0 if out = Int.MAX_VALUE.
+    __ Srl(TMP, TMP, 1);       // TMP = 0 if out = Int.MIN_VALUE
+                               //         or out = Int.MAX_VALUE.
+    __ Beqzc(TMP, &done);
   }
-  __ Bnec(AT, out, &finite);
-
-  if (type == Primitive::kPrimDouble) {
-    __ Dmtc1(ZERO, FTMP);
-    __ CmpLtD(FTMP, in, FTMP);
-    __ Dmfc1(AT, FTMP);
-  } else {
-    __ Mtc1(ZERO, FTMP);
-    __ CmpLtS(FTMP, in, FTMP);
-    __ Mfc1(AT, FTMP);
-  }
-
-  __ Bc(&add);
-
-  __ Bind(&finite);
 
   // TMP = (0.5 <= (in - out)) ? -1 : 0;
   if (type == Primitive::kPrimDouble) {
@@ -977,23 +936,21 @@
     __ SubD(FTMP, in, FTMP);
     __ Dmtc1(AT, half);
     __ CmpLeD(FTMP, half, FTMP);
-    __ Dmfc1(AT, FTMP);
+    __ Dmfc1(TMP, FTMP);
   } else {
     __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
     __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
     __ SubS(FTMP, in, FTMP);
     __ Mtc1(AT, half);
     __ CmpLeS(FTMP, half, FTMP);
-    __ Mfc1(AT, FTMP);
+    __ Mfc1(TMP, FTMP);
   }
 
-  __ Bind(&add);
-
   // Return out -= TMP.
   if (type == Primitive::kPrimDouble) {
-    __ Dsubu(out, out, AT);
+    __ Dsubu(out, out, TMP);
   } else {
-    __ Subu(out, out, AT);
+    __ Subu(out, out, TMP);
   }
 
   __ Bind(&done);
@@ -2664,6 +2621,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, ThreadInterrupted)
+
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 8e45747..57adcc3 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3407,6 +3407,27 @@
   }
 }
 
+void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+  Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
+  NearLabel done;
+  __ fs()->movl(out, address);
+  __ testl(out, out);
+  __ j(kEqual, &done);
+  __ fs()->movl(address, Immediate(0));
+  codegen_->MemoryFence();
+  __ Bind(&done);
+}
+
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ed2ad8..773383e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -759,7 +759,7 @@
   // We have to ensure that the native code doesn't clobber the XMM registers which are
   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
   // saved in the prologue and properly restored.
-  for (auto fp_reg : non_volatile_xmm_regs) {
+  for (FloatRegister fp_reg : non_volatile_xmm_regs) {
     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
   }
 }
@@ -898,7 +898,7 @@
   // We have to ensure that the native code doesn't clobber the XMM registers which are
   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
   // saved in the prologue and properly restored.
-  for (auto fp_reg : non_volatile_xmm_regs) {
+  for (FloatRegister fp_reg : non_volatile_xmm_regs) {
     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
   }
 }
@@ -3085,6 +3085,27 @@
   }
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+  Address address = Address::Absolute
+      (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true);
+  NearLabel done;
+  __ gs()->movl(out, address);
+  __ testl(out, out);
+  __ j(kEqual, &done);
+  __ gs()->movl(address, Immediate(0));
+  codegen_->MemoryFence();
+  __ Bind(&done);
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index bbc55dd..4067aa3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,7 +71,7 @@
   // extension when represented in the *width* of the given narrower data type
   // (the fact that char normally zero extends does not matter here).
   int64_t value = 0;
-  if (IsInt64AndGet(instruction, &value)) {
+  if (IsInt64AndGet(instruction, /*out*/ &value)) {
     switch (type) {
       case Primitive::kPrimByte:
         if (std::numeric_limits<int8_t>::min() <= value &&
@@ -119,7 +119,7 @@
   // extension when represented in the *width* of the given narrower data type
   // (the fact that byte/short normally sign extend does not matter here).
   int64_t value = 0;
-  if (IsInt64AndGet(instruction, &value)) {
+  if (IsInt64AndGet(instruction, /*out*/ &value)) {
     switch (type) {
       case Primitive::kPrimByte:
         if (std::numeric_limits<uint8_t>::min() <= value &&
@@ -833,19 +833,14 @@
     // TODO: accept symbolic, albeit loop invariant shift factors.
     HInstruction* opa = instruction->InputAt(0);
     HInstruction* opb = instruction->InputAt(1);
-    int64_t value = 0;
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
-      // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
-      int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
-          ? kMaxLongShiftDistance
-          : kMaxIntShiftDistance;
-      int64_t distance = value & mask;
+    int64_t distance = 0;
+    if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+        IsInt64AndGet(opb, /*out*/ &distance)) {
       // Restrict shift distance to packed data type width.
       int64_t max_distance = Primitive::ComponentSize(type) * 8;
       if (0 <= distance && distance < max_distance) {
         if (generate_code) {
-          HInstruction* s = graph_->GetIntConstant(distance);
-          GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+          GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
         }
         return true;
       }
@@ -874,6 +869,32 @@
         }
         return false;
       }
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+      case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMaxIntInt:
+      case Intrinsics::kMathMaxLongLong:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMaxDoubleDouble: {
+        // Deal with vector restrictions.
+        if (HasVectorRestrictions(restrictions, kNoMinMax) ||
+            HasVectorRestrictions(restrictions, kNoHiBits)) {
+          // TODO: we can do better for some hibits cases.
+          return false;
+        }
+        // Accept MIN/MAX(x, y) for vectorizable operands.
+        HInstruction* opa = instruction->InputAt(0);
+        HInstruction* opb = instruction->InputAt(1);
+        if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+            VectorizeUse(node, opb, generate_code, type, restrictions)) {
+          if (generate_code) {
+            GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+          }
+          return true;
+        }
+        return false;
+      }
       default:
         return false;
     }  // switch
@@ -903,7 +924,7 @@
           *restrictions |= kNoDiv;
           return TrySetVectorLength(4);
         case Primitive::kPrimLong:
-          *restrictions |= kNoDiv | kNoMul;
+          *restrictions |= kNoDiv | kNoMul | kNoMinMax;
           return TrySetVectorLength(2);
         case Primitive::kPrimFloat:
           return TrySetVectorLength(4);
@@ -929,11 +950,13 @@
             *restrictions |= kNoDiv;
             return TrySetVectorLength(4);
           case Primitive::kPrimLong:
-            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
             return TrySetVectorLength(2);
           case Primitive::kPrimFloat:
+            *restrictions |= kNoMinMax;  // -0.0 vs +0.0
             return TrySetVectorLength(4);
           case Primitive::kPrimDouble:
+            *restrictions |= kNoMinMax;  // -0.0 vs +0.0
             return TrySetVectorLength(2);
           default:
             break;
@@ -1113,6 +1136,24 @@
             DCHECK(opb == nullptr);
             vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
             break;
+          case Intrinsics::kMathMinIntInt:
+          case Intrinsics::kMathMinLongLong:
+          case Intrinsics::kMathMinFloatFloat:
+          case Intrinsics::kMathMinDoubleDouble: {
+            bool is_unsigned = false;  // TODO: detect unsigned versions
+            vector = new (global_allocator_)
+                HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+            break;
+          }
+          case Intrinsics::kMathMaxIntInt:
+          case Intrinsics::kMathMaxLongLong:
+          case Intrinsics::kMathMaxFloatFloat:
+          case Intrinsics::kMathMaxDoubleDouble: {
+            bool is_unsigned = false;  // TODO: detect unsigned versions
+            vector = new (global_allocator_)
+                HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+            break;
+          }
           default:
             LOG(FATAL) << "Unsupported SIMD intrinsic";
             UNREACHABLE();
@@ -1177,14 +1218,14 @@
   int64_t value = 0;
   if ((instruction->IsShr() ||
        instruction->IsUShr()) &&
-      IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+      IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) {
     //
     // TODO: make following code less sensitive to associativity and commutativity differences.
     //
     HInstruction* x = instruction->InputAt(0);
     // Test for an optional rounding part (x + 1) >> 1.
     bool is_rounded = false;
-    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) {
       x = x->InputAt(0);
       is_rounded = true;
     }
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4a7da86..6d5978d 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -71,6 +71,7 @@
     kNoSignedHAdd    = 32,   // no signed halving add
     kNoUnroundedHAdd = 64,   // no unrounded halving add
     kNoAbs           = 128,  // no absolute value
+    kNoMinMax        = 256,  // no min/max
   };
 
   /*
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 1460b26..833f32b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1436,18 +1436,6 @@
   }
 }
 
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
-  switch (rhs) {
-    case HDeoptimize::Kind::kBCE:
-      return os << "bce";
-    case HDeoptimize::Kind::kInline:
-      return os << "inline";
-    default:
-      LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
-      UNREACHABLE();
-  }
-}
-
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
@@ -2716,8 +2704,6 @@
   switch (rhs) {
     case HLoadClass::LoadKind::kReferrersClass:
       return os << "ReferrersClass";
-    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      return os << "BootImageLinkTimeAddress";
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
       return os << "BootImageLinkTimePcRelative";
     case HLoadClass::LoadKind::kBootImageAddress:
@@ -2770,8 +2756,6 @@
 
 std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
   switch (rhs) {
-    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      return os << "BootImageLinkTimeAddress";
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
       return os << "BootImageLinkTimePcRelative";
     case HLoadString::LoadKind::kBootImageAddress:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 72521fd..72774da 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -30,6 +30,7 @@
 #include "base/transform_array_ref.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
+#include "deoptimization_kind.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -1405,7 +1406,8 @@
   M(BitwiseNegatedRight, Instruction)                                   \
   M(DataProcWithShifterOp, Instruction)                                 \
   M(MultiplyAccumulate, Instruction)                                    \
-  M(IntermediateAddress, Instruction)
+  M(IntermediateAddress, Instruction)                                   \
+  M(IntermediateAddressIndex, Instruction)
 #endif
 
 #ifndef ART_ENABLE_CODEGEN_arm
@@ -2991,15 +2993,9 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize FINAL : public HVariableInputSizeInstruction {
  public:
-  enum class Kind {
-    kBCE,
-    kInline,
-    kLast = kInline
-  };
-
   // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
   // across.
-  HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+  HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc)
       : HVariableInputSizeInstruction(
             SideEffects::All(),
             dex_pc,
@@ -3019,7 +3015,7 @@
   HDeoptimize(ArenaAllocator* arena,
               HInstruction* cond,
               HInstruction* guard,
-              Kind kind,
+              DeoptimizationKind kind,
               uint32_t dex_pc)
       : HVariableInputSizeInstruction(
             SideEffects::CanTriggerGC(),
@@ -3043,7 +3039,7 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+  DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); }
 
   Primitive::Type GetType() const OVERRIDE {
     return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
@@ -3068,18 +3064,17 @@
   static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
   static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
   static constexpr size_t kFieldDeoptimizeKindSize =
-      MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+      MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast));
   static constexpr size_t kNumberOfDeoptimizePackedBits =
       kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
   static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
-  using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+  using DeoptimizeKindField =
+      BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
 
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
-
 // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
 // The compiled code checks this flag value in a guard before devirtualized call and
 // if it's true, starts to do deoptimization.
@@ -5669,12 +5664,8 @@
     // Use the Class* from the method's own ArtMethod*.
     kReferrersClass,
 
-    // Use boot image Class* address that will be known at link time.
-    // Used for boot image classes referenced by boot image code in non-PIC mode.
-    kBootImageLinkTimeAddress,
-
     // Use PC-relative boot image Class* address that will be known at link time.
-    // Used for boot image classes referenced by boot image code in PIC mode.
+    // Used for boot image classes referenced by boot image code.
     kBootImageLinkTimePcRelative,
 
     // Use a known boot image Class* address, embedded in the code by the codegen.
@@ -5826,7 +5817,6 @@
 
   static bool HasTypeReference(LoadKind load_kind) {
     return load_kind == LoadKind::kReferrersClass ||
-        load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBssEntry ||
         load_kind == LoadKind::kDexCacheViaMethod;
@@ -5860,7 +5850,6 @@
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
          GetLoadKind() == LoadKind::kBootImageAddress ||
          GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
   DCHECK(special_input_.GetInstruction() == nullptr);
@@ -5872,12 +5861,8 @@
  public:
   // Determines how to load the String.
   enum class LoadKind {
-    // Use boot image String* address that will be known at link time.
-    // Used for boot image strings referenced by boot image code in non-PIC mode.
-    kBootImageLinkTimeAddress,
-
     // Use PC-relative boot image String* address that will be known at link time.
-    // Used for boot image strings referenced by boot image code in PIC mode.
+    // Used for boot image strings referenced by boot image code.
     kBootImageLinkTimePcRelative,
 
     // Use a known boot image String* address, embedded in the code by the codegen.
@@ -5942,8 +5927,7 @@
   // the dex cache and the string is not guaranteed to be there yet.
   bool NeedsEnvironment() const OVERRIDE {
     LoadKind load_kind = GetLoadKind();
-    if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
-        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+    if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBootImageAddress ||
         load_kind == LoadKind::kJitTableAddress) {
       return false;
@@ -6006,7 +5990,6 @@
   // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
          GetLoadKind() == LoadKind::kBssEntry ||
-         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
          GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
   // HLoadString::GetInputRecords() returns an empty array at this point,
   // so use the GetInputRecords() from the base class to set the input record.
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c6bfbcc..075a816 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,49 @@
   DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
 };
 
+// This instruction computes part of the array access offset (data and index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing
+// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with
+// the same data type and index. For example, for the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+//   for (i...) {
+//     a[i] = a[i] + 5;
+//     b[i] = b[i] + c[i];
+//   }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects (in comparison of HIntermediateAddress).
+class HIntermediateAddressIndex FINAL : public HExpression<3> {
+ public:
+  HIntermediateAddressIndex(
+      HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+    SetRawInputAt(0, index);
+    SetRawInputAt(1, offset);
+    SetRawInputAt(2, shift);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+  bool IsActualObject() const OVERRIDE { return false; }
+
+  HInstruction* GetIndex() const { return InputAt(0); }
+  HInstruction* GetOffset() const { return InputAt(1); }
+  HInstruction* GetShift() const { return InputAt(2); }
+
+  DECLARE_INSTRUCTION(IntermediateAddressIndex);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex);
+};
+
 class HDataProcWithShifterOp FINAL : public HExpression<2> {
  public:
   enum OpKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b..5dbe29b 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -178,12 +178,17 @@
                       size_t vector_length,
                       uint32_t dex_pc)
       : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
-        alignment_(Primitive::ComponentSize(packed_type), 0) { }
+        alignment_(Primitive::ComponentSize(packed_type), 0) {
+    DCHECK_GE(number_of_inputs, 2u);
+  }
 
   void SetAlignment(Alignment alignment) { alignment_ = alignment; }
 
   Alignment GetAlignment() const { return alignment_; }
 
+  HInstruction* GetArray() const { return InputAt(0); }
+  HInstruction* GetIndex() const { return InputAt(1); }
+
   DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
 
  private:
@@ -451,13 +456,24 @@
           HInstruction* right,
           Primitive::Type packed_type,
           size_t vector_length,
+          bool is_unsigned,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
+    SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
   }
+
+  bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
+
   DECLARE_INSTRUCTION(VecMin);
+
  private:
+  // Additional packed bits.
+  static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
+  static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HVecMin);
 };
 
@@ -470,13 +486,24 @@
           HInstruction* right,
           Primitive::Type packed_type,
           size_t vector_length,
+          bool is_unsigned,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
+    SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
   }
+
+  bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
+
   DECLARE_INSTRUCTION(VecMax);
+
  private:
+  // Additional packed bits.
+  static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
+  static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HVecMax);
 };
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 065c11e..f928f71 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -638,11 +638,14 @@
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
       GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+      HInstructionScheduling* scheduling =
+          new (arena) HInstructionScheduling(graph, instruction_set, codegen);
       HOptimization* arm_optimizations[] = {
         simplifier,
         side_effects,
         gvn,
-        fixups
+        fixups,
+        scheduling,
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index a0fdde1..ef2c432 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -61,7 +61,6 @@
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
     switch (load_kind) {
-      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
       case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
       case HLoadClass::LoadKind::kBootImageAddress:
       case HLoadClass::LoadKind::kBssEntry:
@@ -77,7 +76,6 @@
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
     HLoadString::LoadKind load_kind = load_string->GetLoadKind();
     switch (load_kind) {
-      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       case HLoadString::LoadKind::kBootImageAddress:
       case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
       case HLoadString::LoadKind::kBssEntry:
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 87f709f..300f4c6 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1968,8 +1968,7 @@
   ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
       allocator_->Adapter(kArenaAllocRegisterAllocator));
 
-  for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
-    LiveInterval* parent_interval = *it;
+  for (LiveInterval* parent_interval : *intervals) {
     DCHECK(parent_interval->IsParent());
     DCHECK(!parent_interval->HasSpillSlot());
     size_t start = parent_interval->GetStart();
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index d65d20c..320f01a 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -23,6 +23,10 @@
 #include "scheduler_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
 namespace art {
 
 void SchedulingGraph::AddDependency(SchedulingNode* node,
@@ -264,10 +268,11 @@
   // Start the dot graph. Use an increasing index for easier differentiation.
   output << "digraph G {\n";
   for (const auto& entry : nodes_map_) {
-    DumpAsDotNode(output, entry.second);
+    SchedulingNode* node = entry.second;
+    DumpAsDotNode(output, node);
   }
   // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
-  for (auto node : initial_candidates) {
+  for (SchedulingNode* node : initial_candidates) {
     const HInstruction* instruction = node->GetInstruction();
     output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
       << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
@@ -580,28 +585,39 @@
 
 void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
                                  bool schedule_randomly) {
+#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
+  // Phase-local allocator that allocates scheduler internal data structures like
+  // scheduling nodes, internel nodes map, dependencies, etc.
+  ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  RandomSchedulingNodeSelector random_selector;
+  SchedulingNodeSelector* selector = schedule_randomly
+      ? static_cast<SchedulingNodeSelector*>(&random_selector)
+      : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+#else
   // Avoid compilation error when compiling for unsupported instruction set.
   UNUSED(only_optimize_loop_blocks);
   UNUSED(schedule_randomly);
+#endif
   switch (instruction_set_) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
-      // Phase-local allocator that allocates scheduler internal data structures like
-      // scheduling nodes, internel nodes map, dependencies, etc.
-      ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
-
-      CriticalPathSchedulingNodeSelector critical_path_selector;
-      RandomSchedulingNodeSelector random_selector;
-      SchedulingNodeSelector* selector = schedule_randomly
-          ? static_cast<SchedulingNodeSelector*>(&random_selector)
-          : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
-
       arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
       scheduler.Schedule(graph_);
       break;
     }
 #endif
+#if defined(ART_ENABLE_CODEGEN_arm)
+    case kThumb2:
+    case kArm: {
+      arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
+      arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor);
+      scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+      scheduler.Schedule(graph_);
+      break;
+    }
+#endif
     default:
       break;
   }
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index 9236a0e..73e8087 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,6 +23,7 @@
 #include "driver/compiler_driver.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "code_generator.h"
 
 namespace art {
 
@@ -469,8 +470,9 @@
 
 class HInstructionScheduling : public HOptimization {
  public:
-  HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+  HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr)
       : HOptimization(graph, kInstructionScheduling),
+        codegen_(cg),
         instruction_set_(instruction_set) {}
 
   void Run() {
@@ -480,6 +482,7 @@
 
   static constexpr const char* kInstructionScheduling = "scheduler";
 
+  CodeGenerator* const codegen_;
   const InstructionSet instruction_set_;
 
  private:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
new file mode 100644
index 0000000..1a89567
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "mirror/array-inl.h"
+#include "scheduler_arm.h"
+
+namespace art {
+namespace arm {
+
+using helpers::Int32ConstantFrom;
+using helpers::Uint64ConstantFrom;
+
+void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
+      // so a bubble (kArmNopLatency) is added to represent the internal carry flag
+      // dependency inside these pairs.
+      last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
+  HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
+  HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmMulFloatingPointLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmMulIntegerLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = kArmIntegerOpLatency;
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
+  HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
+  switch (instr->GetResultType()) {
+    case Primitive::kPrimInt:
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong: {
+      // HandleLongRotate
+      HInstruction* rhs = instr->GetRight();
+      if (rhs->IsConstant()) {
+        uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+        if (rot != 0u) {
+          last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+        }
+      } else {
+        last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
+        last_visited_latency_ = kArmBranchLatency;
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
+  Primitive::Type type = instr->GetResultType();
+  HInstruction* rhs = instr->GetRight();
+  switch (type) {
+    case Primitive::kPrimInt:
+      if (!rhs->IsConstant()) {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+      }
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong:
+      if (!rhs->IsConstant()) {
+        last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
+      } else {
+        uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+        if (shift_value == 1 || shift_value >= 32) {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+        } else {
+          last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+        }
+      }
+      last_visited_latency_ = kArmIntegerOpLatency;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
+  HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+  switch (instr->GetLeft()->GetType()) {
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+  }
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
+  Primitive::Type type = instr->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+    case Primitive::kPrimLong:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+      break;
+  }
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
+  if (internal_latency) {
+    last_visited_internal_latency_ += kArmIntegerOpLatency;
+  } else {
+    last_visited_latency_ = kArmDataProcWithShifterOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  if (kind == HInstruction::kAdd) {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else if (kind == HInstruction::kSub) {
+    last_visited_internal_latency_ = kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    HandleGenerateDataProcInstruction(/* internal_latency */ true);
+    HandleGenerateDataProcInstruction();
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+  DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+  const uint32_t shift_value = instruction->GetShiftAmount();
+  const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+
+  if (shift_value >= 32) {
+    // Different shift types actually generate similar code here,
+    // no need to differentiate shift types like the codegen pass does,
+    // which also avoids handling shift types from different ARM backends.
+    HandleGenerateDataProc(instruction);
+  } else {
+    DCHECK_GT(shift_value, 1U);
+    DCHECK_LT(shift_value, 32U);
+
+    if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+      HandleGenerateDataProcInstruction(/* internal_latency */ true);
+      HandleGenerateDataProcInstruction(/* internal_latency */ true);
+      HandleGenerateDataProcInstruction();
+    } else {
+      last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+      HandleGenerateDataProc(instruction);
+    }
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
+  const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+  if (instruction->GetType() == Primitive::kPrimInt) {
+    DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+    HandleGenerateDataProcInstruction();
+  } else {
+    DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+    if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+      HandleGenerateDataProc(instruction);
+    } else {
+      HandleGenerateLongDataProc(instruction);
+    }
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_internal_latency_ = kArmNopLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArmMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at =
+      mirror::kUseStringCompression && instruction->IsStringCharAt();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  HInstruction* index = instruction->InputAt(1);
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (maybe_compressed_char_at) {
+        last_visited_internal_latency_ += kArmMemoryLoadLatency;
+      }
+      if (index->IsConstant()) {
+        if (maybe_compressed_char_at) {
+          last_visited_internal_latency_ +=
+              kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+          last_visited_latency_ = kArmBranchLatency;
+        } else {
+          last_visited_latency_ += kArmMemoryLoadLatency;
+        }
+      } else {
+        if (has_intermediate_address) {
+        } else {
+          last_visited_internal_latency_ += kArmIntegerOpLatency;
+        }
+        if (maybe_compressed_char_at) {
+          last_visited_internal_latency_ +=
+              kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+          last_visited_latency_ = kArmBranchLatency;
+        } else {
+          last_visited_latency_ += kArmMemoryLoadLatency;
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
+      } else {
+        if (index->IsConstant()) {
+          last_visited_latency_ = kArmMemoryLoadLatency;
+        } else {
+          if (has_intermediate_address) {
+          } else {
+            last_visited_internal_latency_ += kArmIntegerOpLatency;
+          }
+          last_visited_internal_latency_ = kArmMemoryLoadLatency;
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ += kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
+  last_visited_latency_ = kArmMemoryLoadLatency;
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    last_visited_internal_latency_ = kArmMemoryLoadLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
+  HInstruction* index = instruction->InputAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      } else {
+        if (has_intermediate_address) {
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+        }
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        if (index->IsConstant()) {
+          last_visited_latency_ = kArmMemoryStoreLatency;
+        } else {
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmMemoryStoreLatency;
+        }
+      } else {
+        // Following the exact instructions of runtime type checks is too complicated,
+        // just giving it a simple slow latency.
+        last_visited_latency_ = kArmRuntimeTypeCheckLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index->IsConstant()) {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_internal_latency_ = kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unreachable type " << value_type;
+      UNREACHABLE();
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency;
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
+  if (imm == 0) {
+    last_visited_internal_latency_ = 0;
+    last_visited_latency_ = 0;
+  } else if (imm == 1 || imm == -1) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+    last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  } else {
+    last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HInstruction* rhs = instruction->GetRight();
+      if (rhs->IsConstant()) {
+        int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+        HandleDivRemConstantIntegralLatencies(imm);
+      } else {
+        last_visited_latency_ = kArmDivIntegerLatency;
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmDivFloatLatency;
+      break;
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArmDivDoubleLatency;
+      break;
+    default:
+      last_visited_internal_latency_ = kArmCallInternalLatency;
+      last_visited_latency_ = kArmCallLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmCallInternalLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmCallInternalLatency;
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmLoadStringInternalLatency;
+  last_visited_latency_ = kArmMemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
+  if (instruction->IsStringAlloc()) {
+    last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
+  } else {
+    last_visited_internal_latency_ = kArmCallInternalLatency;
+  }
+  last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HInstruction* rhs = instruction->GetRight();
+      if (rhs->IsConstant()) {
+        int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+        HandleDivRemConstantIntegralLatencies(imm);
+      } else {
+        last_visited_internal_latency_ = kArmDivIntegerLatency;
+        last_visited_latency_ = kArmMulIntegerLatency;
+      }
+      break;
+    }
+    default:
+      last_visited_internal_latency_ = kArmCallInternalLatency;
+      last_visited_latency_ = kArmCallLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
+                                                          const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  DCHECK(codegen_ != nullptr);
+  bool is_volatile = field_info.IsVolatile();
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+
+    case Primitive::kPrimNot:
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    case Primitive::kPrimLong:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+
+    case Primitive::kPrimDouble:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ =
+            kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryLoadLatency;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmMemoryLoadLatency;
+      break;
+  }
+
+  if (is_volatile) {
+    last_visited_internal_latency_ += kArmMemoryBarrierLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
+                                                          const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+  DCHECK(codegen_ != nullptr);
+  bool is_volatile = field_info.IsVolatile();
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      if (is_volatile) {
+        last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmMemoryBarrierLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
+      }
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+
+    case Primitive::kPrimLong:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ =
+            kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+
+    case Primitive::kPrimDouble:
+      if (is_volatile && !atomic_ldrd_strd) {
+        last_visited_internal_latency_ = kArmIntegerOpLatency +
+            kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+        last_visited_latency_ = kArmIntegerOpLatency;
+      } else {
+        last_visited_latency_ = kArmMemoryStoreLatency;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmMemoryStoreLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  DCHECK((block->GetLoopInformation() != nullptr) ||
+         (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
+  Primitive::Type result_type = instr->GetResultType();
+  Primitive::Type input_type = instr->GetInputType();
+
+  switch (result_type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      last_visited_latency_ = kArmIntegerOpLatency;  // SBFX or UBFX
+      break;
+
+    case Primitive::kPrimInt:
+      switch (input_type) {
+        case Primitive::kPrimLong:
+          last_visited_latency_ = kArmIntegerOpLatency;  // MOV
+          break;
+        case Primitive::kPrimFloat:
+        case Primitive::kPrimDouble:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimLong:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          // MOV and extension
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+        case Primitive::kPrimFloat:
+        case Primitive::kPrimDouble:
+          // invokes runtime
+          last_visited_internal_latency_ = kArmCallInternalLatency;
+          break;
+        default:
+          last_visited_internal_latency_ = kArmIntegerOpLatency;
+          last_visited_latency_ = kArmIntegerOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimLong:
+          // invokes runtime
+          last_visited_internal_latency_ = kArmCallInternalLatency;
+          break;
+        case Primitive::kPrimDouble:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+      }
+      break;
+
+    case Primitive::kPrimDouble:
+      switch (input_type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+          last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimLong:
+          last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        case Primitive::kPrimFloat:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+        default:
+          last_visited_latency_ = kArmFloatingPointOpLatency;
+          break;
+      }
+      break;
+
+    default:
+      last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
+  last_visited_internal_latency_ = kArmIntegerOpLatency;
+  last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
new file mode 100644
index 0000000..8d5e4f3
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+
+#include "code_generator_arm_vixl.h"
+#include "scheduler.h"
+
+namespace art {
+namespace arm {
+#ifdef ART_USE_OLD_ARM_BACKEND
+typedef CodeGeneratorARM CodeGeneratorARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+#endif
+
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+ public:
+  explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+      : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+
+  // Default visitor for instructions not handled specifically below.
+  void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+    last_visited_latency_ = kArmIntegerOpLatency;
+  }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \
+  M(ArrayGet         , unused)                   \
+  M(ArrayLength      , unused)                   \
+  M(ArraySet         , unused)                   \
+  M(Add              , unused)                   \
+  M(Sub              , unused)                   \
+  M(And              , unused)                   \
+  M(Or               , unused)                   \
+  M(Ror              , unused)                   \
+  M(Xor              , unused)                   \
+  M(Shl              , unused)                   \
+  M(Shr              , unused)                   \
+  M(UShr             , unused)                   \
+  M(Mul              , unused)                   \
+  M(Div              , unused)                   \
+  M(Condition        , unused)                   \
+  M(Compare          , unused)                   \
+  M(BoundsCheck      , unused)                   \
+  M(InstanceFieldGet , unused)                   \
+  M(InstanceFieldSet , unused)                   \
+  M(InstanceOf       , unused)                   \
+  M(Invoke           , unused)                   \
+  M(LoadString       , unused)                   \
+  M(NewArray         , unused)                   \
+  M(NewInstance      , unused)                   \
+  M(Rem              , unused)                   \
+  M(StaticFieldGet   , unused)                   \
+  M(StaticFieldSet   , unused)                   \
+  M(SuspendCheck     , unused)                   \
+  M(TypeConversion   , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+  M(BitwiseNegatedRight, unused)                 \
+  M(MultiplyAccumulate, unused)                  \
+  M(IntermediateAddress, unused)                 \
+  M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused)  \
+  void Visit##type(H##type* instruction) OVERRIDE;
+
+  FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+  void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+  void HandleShiftLatencies(HBinaryOperation* instr);
+  void HandleDivRemConstantIntegralLatencies(int32_t imm);
+  void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleGenerateDataProcInstruction(bool internal_latency = false);
+  void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+  void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+  // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+  // latency visitors may query CodeGenerator for such information for accurate latency settings.
+  CodeGeneratorARMType* codegen_;
+};
+
+class HSchedulerARM : public HScheduler {
+ public:
+  HSchedulerARM(ArenaAllocator* arena,
+                SchedulingNodeSelector* selector,
+                SchedulingLatencyVisitorARM* arm_latency_visitor)
+      : HScheduler(arena, arm_latency_visitor, selector) {}
+  ~HSchedulerARM() OVERRIDE {}
+
+  bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+  HInstruction::InstructionKind::k##type:
+    switch (instruction->GetKind()) {
+      FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
+      FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
+        return true;
+      default:
+        return HScheduler::IsSchedulable(instruction);
+    }
+#undef CASE_INSTRUCTION_KIND
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 31d13e2..d87600a 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -28,6 +28,10 @@
 #include "scheduler_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
 namespace art {
 
 // Return all combinations of ISA and code generator that are executable on
@@ -56,7 +60,7 @@
 #endif
   };
 
-  for (auto test_config : test_config_candidates) {
+  for (const CodegenTargetConfig& test_config : test_config_candidates) {
     if (CanExecute(test_config.GetInstructionSet())) {
       v.push_back(test_config);
     }
@@ -65,133 +69,151 @@
   return v;
 }
 
-class SchedulerTest : public CommonCompilerTest {};
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-TEST_F(SchedulerTest, DependencyGraph) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->AddBlock(block1);
-  graph->SetEntryBlock(entry);
-
-  // entry:
-  // array         ParameterValue
-  // c1            IntConstant
-  // c2            IntConstant
-  // block1:
-  // add1          Add [c1, c2]
-  // add2          Add [add1, c2]
-  // mul           Mul [add1, add2]
-  // div_check     DivZeroCheck [add2] (env: add2, mul)
-  // div           Div [add1, div_check]
-  // array_get1    ArrayGet [array, add1]
-  // array_set1    ArraySet [array, add1, add2]
-  // array_get2    ArrayGet [array, add1]
-  // array_set2    ArraySet [array, add1, add2]
-
-  HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                         dex::TypeIndex(0),
-                                                         0,
-                                                         Primitive::kPrimNot);
-  HInstruction* c1 = graph->GetIntConstant(1);
-  HInstruction* c2 = graph->GetIntConstant(10);
-  HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
-  HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
-  HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
-  HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
-  HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
-  HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
-  HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-  HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
-  HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-
-  DCHECK(div_check->CanThrow());
-
-  entry->AddInstruction(array);
-
-  HInstruction* block_instructions[] = {add1,
-                                        add2,
-                                        mul,
-                                        div_check,
-                                        div,
-                                        array_get1,
-                                        array_set1,
-                                        array_get2,
-                                        array_set2};
-  for (auto instr : block_instructions) {
-    block1->AddInstruction(instr);
+class SchedulerTest : public CommonCompilerTest {
+ public:
+  SchedulerTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
   }
 
-  HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
-                                                            2,
-                                                            graph->GetArtMethod(),
+  // Build scheduling graph, and run target specific scheduling on it.
+  void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) {
+    HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+    HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(entry);
+    graph_->AddBlock(block1);
+    graph_->SetEntryBlock(entry);
+
+    // entry:
+    // array         ParameterValue
+    // c1            IntConstant
+    // c2            IntConstant
+    // block1:
+    // add1          Add [c1, c2]
+    // add2          Add [add1, c2]
+    // mul           Mul [add1, add2]
+    // div_check     DivZeroCheck [add2] (env: add2, mul)
+    // div           Div [add1, div_check]
+    // array_get1    ArrayGet [array, add1]
+    // array_set1    ArraySet [array, add1, add2]
+    // array_get2    ArrayGet [array, add1]
+    // array_set2    ArraySet [array, add1, add2]
+
+    HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                            dex::TypeIndex(0),
                                                             0,
-                                                            div_check);
-  div_check->SetRawEnvironment(environment);
-  environment->SetRawEnvAt(0, add2);
-  add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
-  environment->SetRawEnvAt(1, mul);
-  mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+                                                            Primitive::kPrimNot);
+    HInstruction* c1 = graph_->GetIntConstant(1);
+    HInstruction* c2 = graph_->GetIntConstant(10);
+    HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2);
+    HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2);
+    HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2);
+    HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0);
+    HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+    HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+    HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+    HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+    HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
 
-  ArenaAllocator* arena = graph->GetArena();
-  CriticalPathSchedulingNodeSelector critical_path_selector;
-  arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
-  SchedulingGraph scheduling_graph(&scheduler, arena);
-  // Instructions must be inserted in reverse order into the scheduling graph.
-  for (auto instr : ReverseRange(block_instructions)) {
-    scheduling_graph.AddNode(instr);
+    DCHECK(div_check->CanThrow());
+
+    entry->AddInstruction(array);
+
+    HInstruction* block_instructions[] = {add1,
+                                          add2,
+                                          mul,
+                                          div_check,
+                                          div,
+                                          array_get1,
+                                          array_set1,
+                                          array_get2,
+                                          array_set2};
+    for (HInstruction* instr : block_instructions) {
+      block1->AddInstruction(instr);
+    }
+
+    HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_,
+                                                               2,
+                                                               graph_->GetArtMethod(),
+                                                               0,
+                                                               div_check);
+    div_check->SetRawEnvironment(environment);
+    environment->SetRawEnvAt(0, add2);
+    add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+    environment->SetRawEnvAt(1, mul);
+    mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+    SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+    // Instructions must be inserted in reverse order into the scheduling graph.
+    for (HInstruction* instr : ReverseRange(block_instructions)) {
+      scheduling_graph.AddNode(instr);
+    }
+
+    // Should not have dependencies cross basic blocks.
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+    // Define-use dependency.
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+    ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+    // Read and write dependencies
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+    // Env dependency.
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+    ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+    // CanThrow.
+    ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+
+    // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor.
+    scheduler->Schedule(graph_);
   }
 
-  // Should not have dependencies cross basic blocks.
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+  void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+    for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+      HGraph* graph = CreateCFG(&allocator_, data);
 
-  // Define-use dependency.
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
-  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+      // Schedule the graph randomly.
+      HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+      scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
 
-  // Read and write dependencies
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+      RunCode(target_config,
+              graph,
+              [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+              has_result, expected);
+    }
+  }
 
-  // Env dependency.
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
-  ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
 
-  // CanThrow.
-  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+#if defined(ART_ENABLE_CODEGEN_arm64)
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+  TestBuildDependencyGraphAndSchedule(&scheduler);
 }
 #endif
 
-static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
-                                             bool has_result,
-                                             int expected) {
-  for (CodegenTargetConfig target_config : GetTargetConfigs()) {
-    ArenaPool pool;
-    ArenaAllocator arena(&pool);
-    HGraph* graph = CreateCFG(&arena, data);
-
-    // Schedule the graph randomly.
-    HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
-    scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
-
-    RunCode(target_config,
-            graph,
-            [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
-            has_result, expected);
-  }
+#if defined(ART_ENABLE_CODEGEN_arm)
+TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+  arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+  TestBuildDependencyGraphAndSchedule(&scheduler);
 }
+#endif
 
 TEST_F(SchedulerTest, RandomScheduling) {
   //
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index eedaf6e..9a03163 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -56,7 +56,7 @@
   const std::vector<gc::space::ImageSpace*>& image_spaces =
       Runtime::Current()->GetHeap()->GetBootImageSpaces();
   for (gc::space::ImageSpace* image_space : image_spaces) {
-    const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
+    const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection();
     if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
       return true;
     }
@@ -170,9 +170,7 @@
       } else if ((klass != nullptr) && compiler_driver->IsImageClass(
           dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
         is_in_boot_image = true;
-        desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
-            ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
-            : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+        desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // Not a boot image class.
         DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
@@ -182,8 +180,7 @@
       is_in_boot_image = (klass != nullptr) &&
           runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get());
       if (runtime->UseJitCompilation()) {
-        // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
-        // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+        DCHECK(!codegen->GetCompilerOptions().GetCompilePic());
         if (is_in_boot_image) {
           // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
           desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
@@ -249,16 +246,13 @@
       CHECK(string != nullptr);
       if (compiler_driver_->GetSupportBootImageFixup()) {
         DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
-        desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
-            ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
-            : HLoadString::LoadKind::kBootImageLinkTimeAddress;
+        desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // compiler_driver_test. Do not sharpen.
         desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
       }
     } else if (runtime->UseJitCompilation()) {
-      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
-      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
       string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
       if (string != nullptr) {
         if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index a1016d1..029eb4b 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -190,7 +190,7 @@
   HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
   block->AddInstruction(ae);
   HInstruction* deoptimize =
-      new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
+      new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u);
   block->AddInstruction(deoptimize);
   HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
                                                                 /* number_of_vregs */ 5,
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 0ed8a35..0f24e81 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -652,6 +652,9 @@
   virtual void blx(Register rm, Condition cond = AL) = 0;
   virtual void bx(Register rm, Condition cond = AL) = 0;
 
+  // ADR instruction loading register for branching to the label.
+  virtual void AdrCode(Register rt, Label* label) = 0;
+
   // Memory barriers.
   virtual void dmb(DmbOptions flavor) = 0;
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 1e71d06..d7096b3 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -214,14 +214,14 @@
   DCHECK_GE(dest_end, src_end);
   for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) {
     Fixup* fixup = &*i;
+    size_t old_fixup_location = fixup->GetLocation();
     if (fixup->GetOriginalSize() == fixup->GetSize()) {
       // The size of this Fixup didn't change. To avoid moving the data
       // in small chunks, emit the code to its original position.
-      fixup->Emit(&buffer_, adjusted_code_size);
       fixup->Finalize(dest_end - src_end);
+      fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size);
     } else {
       // Move the data between the end of the fixup and src_end to its final location.
-      size_t old_fixup_location = fixup->GetLocation();
       size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes();
       size_t data_size = src_end - src_begin;
       size_t dest_begin  = dest_end - data_size;
@@ -230,7 +230,7 @@
       dest_end = dest_begin - fixup->GetSizeInBytes();
       // Finalize the Fixup and emit the data to the new location.
       fixup->Finalize(dest_end - src_end);
-      fixup->Emit(&buffer_, adjusted_code_size);
+      fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size);
     }
   }
   CHECK_EQ(src_end, dest_end);
@@ -1895,6 +1895,9 @@
     case kCbxz48Bit:
       return 6u;
 
+    case kCodeAddr4KiB:
+      return 4u;
+
     case kLiteral1KiB:
       return 2u;
     case kLiteral4KiB:
@@ -1973,6 +1976,15 @@
       diff -= 2;        // Extra CMP Rn, #0, 16-bit.
       break;
 
+    case kCodeAddr4KiB:
+      // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC
+      // isn't a multiple of 2, we need to adjust.
+      DCHECK_ALIGNED(diff, 2);
+      diff += location_ & 2;
+      // Add the Thumb mode bit.
+      diff += 1;
+      break;
+
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLongOrFPLiteral1KiB:
@@ -1987,8 +1999,8 @@
       diff = diff + (diff & 2);
       DCHECK_GE(diff, 0);
       break;
-    case kLiteral1MiB:
     case kLiteral64KiB:
+    case kLiteral1MiB:
     case kLongOrFPLiteral64KiB:
     case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
@@ -2041,6 +2053,10 @@
       // We don't support conditional branches beyond +-1MiB.
       return true;
 
+    case kCodeAddr4KiB:
+      // ADR uses the aligned PC and as such the offset cannot be calculated early.
+      return false;
+
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLiteral64KiB:
@@ -2087,6 +2103,10 @@
       // We don't support conditional branches beyond +-1MiB.
       break;
 
+    case kCodeAddr4KiB:
+      // We don't support Code address ADR beyond +4KiB.
+      break;
+
     case kLiteral1KiB:
       DCHECK(!IsHighRegister(rn_));
       if (IsUint<10>(GetOffset(current_code_size))) {
@@ -2159,13 +2179,15 @@
   return current_code_size - old_code_size;
 }
 
-void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const {
+void Thumb2Assembler::Fixup::Emit(uint32_t emit_location,
+                                  AssemblerBuffer* buffer,
+                                  uint32_t code_size) const {
   switch (GetSize()) {
     case kBranch16Bit: {
       DCHECK(type_ == kUnconditional || type_ == kConditional);
       DCHECK_EQ(type_ == kConditional, cond_ != AL);
       int16_t encoding = BEncoding16(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kBranch32Bit: {
@@ -2180,15 +2202,15 @@
         DCHECK_NE(encoding & B12, 0);
         encoding ^= B14 | B12;
       }
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
 
     case kCbxz16Bit: {
       DCHECK(type_ == kCompareAndBranchXZero);
       int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kCbxz32Bit: {
@@ -2196,8 +2218,8 @@
       DCHECK(cond_ == EQ || cond_ == NE);
       int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
       int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2, b_encoding);
+      buffer->Store<int16_t>(emit_location, cmp_encoding);
+      buffer->Store<int16_t>(emit_location + 2, b_encoding);
       break;
     }
     case kCbxz48Bit: {
@@ -2205,24 +2227,32 @@
       DCHECK(cond_ == EQ || cond_ == NE);
       int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
       int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, cmp_encoding);
+      buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+      break;
+    }
+
+    case kCodeAddr4KiB: {
+      DCHECK(type_ == kLoadCodeAddr);
+      int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
 
     case kLiteral1KiB: {
       DCHECK(type_ == kLoadLiteralNarrow);
       int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kLiteral4KiB: {
       DCHECK(type_ == kLoadLiteralNarrow);
       // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly.
       int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLiteral64KiB: {
@@ -2242,11 +2272,11 @@
       int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
       int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
     case kLiteralFar: {
@@ -2256,36 +2286,36 @@
       int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
       int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0);
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
 
     case kLiteralAddr1KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding);
+      buffer->Store<int16_t>(emit_location, encoding);
       break;
     }
     case kLiteralAddr4KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLiteralAddr64KiB: {
       DCHECK(type_ == kLoadLiteralAddr);
       int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
       break;
     }
     case kLiteralAddrFar: {
@@ -2294,29 +2324,29 @@
       int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
       int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
       break;
     }
 
     case kLongOrFPLiteral1KiB: {
       int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
     case kLongOrFPLiteral64KiB: {
       int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
       int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u);    // DCHECKs type_.
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
     case kLongOrFPLiteralFar: {
@@ -2325,13 +2355,13 @@
       int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff);
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
       int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0);                 // DCHECKs type_.
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+      buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+      buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
       break;
     }
   }
@@ -3331,6 +3361,19 @@
 }
 
 
+void Thumb2Assembler::AdrCode(Register rt, Label* label) {
+  uint32_t pc = buffer_.Size();
+  FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt));
+  CHECK(!label->IsBound());
+  // ADR target must be an unbound label. Add it to a singly-linked list maintained within
+  // the code with the label serving as the head.
+  Emit16(static_cast<uint16_t>(label->position_));
+  label->LinkTo(branch_id);
+  Emit16(0);
+  DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes());
+}
+
+
 void Thumb2Assembler::Push(Register rd, Condition cond) {
   str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
 }
@@ -3405,7 +3448,7 @@
         break;
       }
     }
-    last_fixup.Emit(&buffer_, buffer_.Size());
+    last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size());
     fixups_.pop_back();
   }
 }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 1c495aa..2ff9018 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -268,6 +268,9 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
+  // ADR instruction loading register for branching to the label, including the Thumb mode bit.
+  void AdrCode(Register rt, Label* label) OVERRIDE;
+
   virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
   virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
@@ -377,6 +380,10 @@
     force_32bit_ = true;
   }
 
+  void Allow16Bit() {
+    force_32bit_ = false;
+  }
+
   // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
   // will generate a fixup.
   JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
@@ -422,6 +429,7 @@
       kUnconditionalLink,         // BL.
       kUnconditionalLinkX,        // BLX.
       kCompareAndBranchXZero,     // cbz/cbnz.
+      kLoadCodeAddr,              // Get address of a code label, used for Baker read barriers.
       kLoadLiteralNarrow,         // Load narrrow integer literal.
       kLoadLiteralWide,           // Load wide integer literal.
       kLoadLiteralAddr,           // Load address of literal (used for jump table).
@@ -442,6 +450,10 @@
       kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
       kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
 
+      // ADR variants.
+      kCodeAddr4KiB,  // ADR rX, <label>; label must be after the ADR but within 4KiB range.
+                      // Multi-instruction expansion is not supported.
+
       // Load integer literal variants.
       // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
       kLiteral1KiB,
@@ -492,6 +504,12 @@
                    cond, kCompareAndBranchXZero, kCbxz16Bit, location);
     }
 
+    // Code address.
+    static Fixup LoadCodeAddress(uint32_t location, Register rt) {
+      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+                   AL, kLoadCodeAddr, kCodeAddr4KiB, location);
+    }
+
     // Load narrow literal.
     static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
@@ -550,6 +568,7 @@
       switch (GetOriginalSize()) {
         case kBranch32Bit:
         case kCbxz48Bit:
+        case kCodeAddr4KiB:
         case kLiteralFar:
         case kLiteralAddrFar:
         case kLongOrFPLiteralFar:
@@ -623,7 +642,7 @@
 
     // Emit the branch instruction into the assembler buffer.  This does the
     // encoding into the thumb instruction.
-    void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
+    void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const;
 
    private:
     Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
@@ -903,6 +922,26 @@
   FixupId last_fixup_id_;
 };
 
+class ScopedForce32Bit {
+ public:
+  explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true)
+      : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
+    if (force) {
+      assembler->Force32Bit();
+    }
+  }
+
+  ~ScopedForce32Bit() {
+    if (!old_force_32bit_) {
+      assembler_->Allow16Bit();
+    }
+  }
+
+ private:
+  Thumb2Assembler* const assembler_;
+  const bool old_force_32bit_;
+};
+
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index f8c4008..eaaf815 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5535,7 +5535,7 @@
   "  f0:	f1bc 0f00 	cmp.w	ip, #0\n",
   "  f4:	bf18      	it	ne\n",
   "  f6:	f20d 4c01 	addwne	ip, sp, #1025	; 0x401\n",
-  "  fa:	f8d9 c084 	ldr.w	ip, [r9, #132]	; 0x84\n",
+  "  fa:	f8d9 c08c 	ldr.w	ip, [r9, #140]	; 0x8c\n",
   "  fe:	f1bc 0f00 	cmp.w	ip, #0\n",
   " 102:	d171      	bne.n	1e8 <VixlJniHelpers+0x1e8>\n",
   " 104:	f8cd c7ff 	str.w	ip, [sp, #2047]	; 0x7ff\n",
@@ -5610,7 +5610,7 @@
   " 214:	ecbd 8a10 	vpop	{s16-s31}\n",
   " 218:	e8bd 8de0 	ldmia.w	sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
   " 21c:	4660      	mov	r0, ip\n",
-  " 21e:	f8d9 c2b8 	ldr.w	ip, [r9, #696]	; 0x2b8\n",
+  " 21e:	f8d9 c2c0 	ldr.w	ip, [r9, #704]	; 0x2c0\n",
   " 222:	47e0      	blx	ip\n",
   nullptr
 };
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index f4afb33..c03b98c 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1775,6 +1775,26 @@
   EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
 }
 
+void Mips64Assembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
+}
+
 void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
   TemplateLoadConst32(this, rd, value);
 }
@@ -2802,6 +2822,94 @@
   CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1));
 }
 
+void Mips64Assembler::AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+                                                          int32_t& offset,
+                                                          int& element_size_shift) {
+  // This method is used to adjust the base register, offset and element_size_shift
+  // for a vector load/store when the offset doesn't fit into allowed number of bits.
+  // MSA ld.df and st.df instructions take signed offsets as arguments, but maximum
+  // offset is dependant on the size of the data format df (10-bit offsets for ld.b,
+  // 11-bit for ld.h, 12-bit for ld.w and 13-bit for ld.d).
+  // If element_size_shift is non-negative at entry, it won't be changed, but offset
+  // will be checked for appropriate alignment. If negative at entry, it will be
+  // adjusted based on offset for maximum fit.
+  // It's assumed that `base` is a multiple of 8.
+
+  CHECK_NE(base, AT);  // Must not overwrite the register `base` while loading `offset`.
+
+  if (element_size_shift >= 0) {
+    CHECK_LE(element_size_shift, TIMES_8);
+    CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+  } else if (IsAligned<kMips64DoublewordSize>(offset)) {
+    element_size_shift = TIMES_8;
+  } else if (IsAligned<kMips64WordSize>(offset)) {
+    element_size_shift = TIMES_4;
+  } else if (IsAligned<kMips64HalfwordSize>(offset)) {
+    element_size_shift = TIMES_2;
+  } else {
+    element_size_shift = TIMES_1;
+  }
+
+  const int low_len = 10 + element_size_shift;  // How many low bits of `offset` ld.df/st.df
+                                                // will take.
+  int16_t low = offset & ((1 << low_len) - 1);  // Isolate these bits.
+  low -= (low & (1 << (low_len - 1))) << 1;     // Sign-extend these bits.
+  if (low == offset) {
+    return;  // `offset` fits into ld.df/st.df.
+  }
+
+  // First, see if `offset` can be represented as a sum of two signed offsets.
+  // This can save an instruction.
+
+  // Max int16_t that's a multiple of element size.
+  const int32_t kMaxDeltaForSimpleAdjustment = 0x8000 - (1 << element_size_shift);
+  // Max ld.df/st.df offset that's a multiple of element size.
+  const int32_t kMaxLoadStoreOffset = 0x1ff << element_size_shift;
+  const int32_t kMaxOffsetForSimpleAdjustment = kMaxDeltaForSimpleAdjustment + kMaxLoadStoreOffset;
+
+  if (IsInt<16>(offset)) {
+    Daddiu(AT, base, offset);
+    offset = 0;
+  } else if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+    Daddiu(AT, base, kMaxDeltaForSimpleAdjustment);
+    offset -= kMaxDeltaForSimpleAdjustment;
+  } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+    Daddiu(AT, base, -kMaxDeltaForSimpleAdjustment);
+    offset += kMaxDeltaForSimpleAdjustment;
+  } else {
+    // Let's treat `offset` as 64-bit to simplify handling of sign
+    // extensions in the instructions that supply its smaller signed parts.
+    //
+    // 16-bit or smaller parts of `offset`:
+    // |63  top  48|47  hi  32|31  upper  16|15  mid  13-10|12-9  low  0|
+    //
+    // Instructions that supply each part as a signed integer addend:
+    // |dati       |dahi      |daui         |daddiu        |ld.df/st.df |
+    //
+    // `top` is always 0, so dati isn't used.
+    // `hi` is 1 when `offset` is close to +2GB and 0 otherwise.
+    uint64_t tmp = static_cast<uint64_t>(offset) - low;  // Exclude `low` from the rest of `offset`
+                                                         // (accounts for sign of `low`).
+    tmp += (tmp & (UINT64_C(1) << 15)) << 1;  // Account for sign extension in daddiu.
+    tmp += (tmp & (UINT64_C(1) << 31)) << 1;  // Account for sign extension in daui.
+    int16_t mid = Low16Bits(tmp);
+    int16_t upper = High16Bits(tmp);
+    int16_t hi = Low16Bits(High32Bits(tmp));
+    Daui(AT, base, upper);
+    if (hi != 0) {
+      CHECK_EQ(hi, 1);
+      Dahi(AT, hi);
+    }
+    if (mid != 0) {
+      Daddiu(AT, AT, mid);
+    }
+    offset = low;
+  }
+  base = AT;
+  CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+  CHECK(IsInt<10>(offset >> element_size_shift));
+}
+
 void Mips64Assembler::LoadFromOffset(LoadOperandType type,
                                      GpuRegister reg,
                                      GpuRegister base,
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 6ac3361..c92cf4c 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -278,14 +278,16 @@
   kLoadUnsignedHalfword,
   kLoadWord,
   kLoadUnsignedWord,
-  kLoadDoubleword
+  kLoadDoubleword,
+  kLoadQuadword
 };
 
 enum StoreOperandType {
   kStoreByte,
   kStoreHalfword,
   kStoreWord,
-  kStoreDoubleword
+  kStoreDoubleword,
+  kStoreQuadword
 };
 
 // Used to test the values returned by ClassS/ClassD.
@@ -767,6 +769,11 @@
   void StW(VectorRegister wd, GpuRegister rs, int offset);
   void StD(VectorRegister wd, GpuRegister rs, int offset);
 
+  void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Higher level composite instructions.
   int InstrCountForLoadReplicatedConst32(int64_t);
   void LoadConst32(GpuRegister rd, int32_t value);
@@ -896,6 +903,10 @@
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
+  // If element_size_shift is negative at entry, its value will be calculated based on the offset.
+  void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+                                           int32_t& offset,
+                                           int& element_size_shift);
 
  private:
   // This will be used as an argument for loads/stores
@@ -1019,6 +1030,8 @@
           null_checker();
         }
         break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
     }
     if (type != kLoadDoubleword) {
       null_checker();
@@ -1031,7 +1044,12 @@
                          GpuRegister base,
                          int32_t offset,
                          ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
-    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+    int element_size_shift = -1;
+    if (type != kLoadQuadword) {
+      AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+    } else {
+      AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+    }
 
     switch (type) {
       case kLoadWord:
@@ -1051,6 +1069,17 @@
           null_checker();
         }
         break;
+      case kLoadQuadword:
+        switch (element_size_shift) {
+          case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break;
+          default:
+            LOG(FATAL) << "UNREACHABLE";
+        }
+        null_checker();
+        break;
       default:
         LOG(FATAL) << "UNREACHABLE";
     }
@@ -1104,7 +1133,12 @@
                         GpuRegister base,
                         int32_t offset,
                         ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
-    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+    int element_size_shift = -1;
+    if (type != kStoreQuadword) {
+      AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+    } else {
+      AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+    }
 
     switch (type) {
       case kStoreWord:
@@ -1124,6 +1158,17 @@
           null_checker();
         }
         break;
+      case kStoreQuadword:
+        switch (element_size_shift) {
+          case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break;
+          case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break;
+          default:
+            LOG(FATAL) << "UNREACHABLE";
+        }
+        null_checker();
+        break;
       default:
         LOG(FATAL) << "UNREACHABLE";
     }
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 084ce6f..fbebe0c 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1970,6 +1970,50 @@
   __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
   __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
 
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 8);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 511);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 512);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 513);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 514);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 516);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1022);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1024);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1025);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1026);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1028);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2044);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2048);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2049);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2050);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2052);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4088);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4096);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4097);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4098);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4100);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4104);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFC);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x8000);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x10000);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12345678);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12350078);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -256);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -511);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -513);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1022);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1026);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2044);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2052);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4096);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4104);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -32768);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+  __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
   const char* expected =
       "lwc1 $f0, 0($a0)\n"
       "lwc1 $f0, 4($a0)\n"
@@ -2010,7 +2054,78 @@
       "ldc1 $f0, -256($a0)\n"
       "ldc1 $f0, -32768($a0)\n"
       "daui $at, $a0, 0xABCE\n"
-      "ldc1 $f0, -0x1100($at) # 0xEF00\n";
+      "ldc1 $f0, -0x1100($at) # 0xEF00\n"
+
+      "ld.d $w0, 0($a0)\n"
+      "ld.b $w0, 1($a0)\n"
+      "ld.h $w0, 2($a0)\n"
+      "ld.w $w0, 4($a0)\n"
+      "ld.d $w0, 8($a0)\n"
+      "ld.b $w0, 511($a0)\n"
+      "ld.d $w0, 512($a0)\n"
+      "daddiu $at, $a0, 513\n"
+      "ld.b $w0, 0($at)\n"
+      "ld.h $w0, 514($a0)\n"
+      "ld.w $w0, 516($a0)\n"
+      "ld.h $w0, 1022($a0)\n"
+      "ld.d $w0, 1024($a0)\n"
+      "daddiu $at, $a0, 1025\n"
+      "ld.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 1026\n"
+      "ld.h $w0, 0($at)\n"
+      "ld.w $w0, 1028($a0)\n"
+      "ld.w $w0, 2044($a0)\n"
+      "ld.d $w0, 2048($a0)\n"
+      "daddiu $at, $a0, 2049\n"
+      "ld.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 2050\n"
+      "ld.h $w0, 0($at)\n"
+      "daddiu $at, $a0, 2052\n"
+      "ld.w $w0, 0($at)\n"
+      "ld.d $w0, 4088($a0)\n"
+      "daddiu $at, $a0, 4096\n"
+      "ld.d $w0, 0($at)\n"
+      "daddiu $at, $a0, 4097\n"
+      "ld.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 4098\n"
+      "ld.h $w0, 0($at)\n"
+      "daddiu $at, $a0, 4100\n"
+      "ld.w $w0, 0($at)\n"
+      "daddiu $at, $a0, 4104\n"
+      "ld.d $w0, 0($at)\n"
+      "daddiu $at, $a0, 0x7FFC\n"
+      "ld.w $w0, 0($at)\n"
+      "daddiu $at, $a0, 0x7FF8\n"
+      "ld.d $w0, 8($at)\n"
+      "daui $at, $a0, 0x1\n"
+      "ld.d $w0, 0($at)\n"
+      "daui $at, $a0, 0x1234\n"
+      "daddiu $at, $at, 0x6000\n"
+      "ld.d $w0, -2440($at) # 0xF678\n"
+      "daui $at, $a0, 0x1235\n"
+      "ld.d $w0, 0x78($at)\n"
+      "ld.d $w0, -256($a0)\n"
+      "ld.b $w0, -511($a0)\n"
+      "daddiu $at, $a0, -513\n"
+      "ld.b $w0, 0($at)\n"
+      "ld.h $w0, -1022($a0)\n"
+      "daddiu $at, $a0, -1026\n"
+      "ld.h $w0, 0($at)\n"
+      "ld.w $w0, -2044($a0)\n"
+      "daddiu $at, $a0, -2052\n"
+      "ld.w $w0, 0($at)\n"
+      "ld.d $w0, -4096($a0)\n"
+      "daddiu $at, $a0, -4104\n"
+      "ld.d $w0, 0($at)\n"
+      "daddiu $at, $a0, -32768\n"
+      "ld.d $w0, 0($at)\n"
+      "daui $at, $a0, 0xABCE\n"
+      "daddiu $at, $at, -8192 # 0xE000\n"
+      "ld.d $w0, 0xF00($at)\n"
+      "daui $at, $a0, 0x8000\n"
+      "dahi $at, $at, 1\n"
+      "daddiu $at, $at, -21504 # 0xAC00\n"
+      "ld.b $w0, -51($at) # 0xFFCD\n";
   DriverStr(expected, "LoadFpuFromOffset");
 }
 
@@ -2200,6 +2315,50 @@
   __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
   __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
 
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 8);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 511);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 512);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 513);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 514);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 516);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1022);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1024);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1025);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1026);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1028);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2044);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2048);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2049);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2050);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2052);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4088);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4096);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4097);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4098);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4100);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4104);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFC);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x8000);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x10000);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12345678);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12350078);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -256);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -511);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -513);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1022);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1026);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2044);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2052);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4096);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4104);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -32768);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+  __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
   const char* expected =
       "swc1 $f0, 0($a0)\n"
       "swc1 $f0, 4($a0)\n"
@@ -2240,7 +2399,78 @@
       "sdc1 $f0, -256($a0)\n"
       "sdc1 $f0, -32768($a0)\n"
       "daui $at, $a0, 0xABCE\n"
-      "sdc1 $f0, -0x1100($at)\n";
+      "sdc1 $f0, -0x1100($at)\n"
+
+      "st.d $w0, 0($a0)\n"
+      "st.b $w0, 1($a0)\n"
+      "st.h $w0, 2($a0)\n"
+      "st.w $w0, 4($a0)\n"
+      "st.d $w0, 8($a0)\n"
+      "st.b $w0, 511($a0)\n"
+      "st.d $w0, 512($a0)\n"
+      "daddiu $at, $a0, 513\n"
+      "st.b $w0, 0($at)\n"
+      "st.h $w0, 514($a0)\n"
+      "st.w $w0, 516($a0)\n"
+      "st.h $w0, 1022($a0)\n"
+      "st.d $w0, 1024($a0)\n"
+      "daddiu $at, $a0, 1025\n"
+      "st.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 1026\n"
+      "st.h $w0, 0($at)\n"
+      "st.w $w0, 1028($a0)\n"
+      "st.w $w0, 2044($a0)\n"
+      "st.d $w0, 2048($a0)\n"
+      "daddiu $at, $a0, 2049\n"
+      "st.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 2050\n"
+      "st.h $w0, 0($at)\n"
+      "daddiu $at, $a0, 2052\n"
+      "st.w $w0, 0($at)\n"
+      "st.d $w0, 4088($a0)\n"
+      "daddiu $at, $a0, 4096\n"
+      "st.d $w0, 0($at)\n"
+      "daddiu $at, $a0, 4097\n"
+      "st.b $w0, 0($at)\n"
+      "daddiu $at, $a0, 4098\n"
+      "st.h $w0, 0($at)\n"
+      "daddiu $at, $a0, 4100\n"
+      "st.w $w0, 0($at)\n"
+      "daddiu $at, $a0, 4104\n"
+      "st.d $w0, 0($at)\n"
+      "daddiu $at, $a0, 0x7FFC\n"
+      "st.w $w0, 0($at)\n"
+      "daddiu $at, $a0, 0x7FF8\n"
+      "st.d $w0, 8($at)\n"
+      "daui $at, $a0, 0x1\n"
+      "st.d $w0, 0($at)\n"
+      "daui $at, $a0, 0x1234\n"
+      "daddiu $at, $at, 0x6000\n"
+      "st.d $w0, -2440($at) # 0xF678\n"
+      "daui $at, $a0, 0x1235\n"
+      "st.d $w0, 0x78($at)\n"
+      "st.d $w0, -256($a0)\n"
+      "st.b $w0, -511($a0)\n"
+      "daddiu $at, $a0, -513\n"
+      "st.b $w0, 0($at)\n"
+      "st.h $w0, -1022($a0)\n"
+      "daddiu $at, $a0, -1026\n"
+      "st.h $w0, 0($at)\n"
+      "st.w $w0, -2044($a0)\n"
+      "daddiu $at, $a0, -2052\n"
+      "st.w $w0, 0($at)\n"
+      "st.d $w0, -4096($a0)\n"
+      "daddiu $at, $a0, -4104\n"
+      "st.d $w0, 0($at)\n"
+      "daddiu $at, $a0, -32768\n"
+      "st.d $w0, 0($at)\n"
+      "daui $at, $a0, 0xABCE\n"
+      "daddiu $at, $at, -8192 # 0xE000\n"
+      "st.d $w0, 0xF00($at)\n"
+      "daui $at, $a0, 0x8000\n"
+      "dahi $at, $at, 1\n"
+      "daddiu $at, $at, -21504 # 0xAC00\n"
+      "st.b $w0, -51($at) # 0xFFCD\n";
   DriverStr(expected, "StoreFpuToOffset");
 }
 
@@ -2990,6 +3220,26 @@
             "st.d");
 }
 
+TEST_F(AssemblerMIPS64Test, IlvrB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvr.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvr.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvr.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvr.d");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1736618..bef32f8 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1238,6 +1238,139 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x38);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xEA);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xEE);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x39);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDA);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDE);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3E);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3B);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3F);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst, src);
+}
 
 void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a747cda..c4bb9ee 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -498,6 +498,25 @@
   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pavgw(XmmRegister dst, XmmRegister src);
 
+  void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxsb(XmmRegister dst, XmmRegister src);
+  void pminsw(XmmRegister dst, XmmRegister src);
+  void pmaxsw(XmmRegister dst, XmmRegister src);
+  void pminsd(XmmRegister dst, XmmRegister src);
+  void pmaxsd(XmmRegister dst, XmmRegister src);
+
+  void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxub(XmmRegister dst, XmmRegister src);
+  void pminuw(XmmRegister dst, XmmRegister src);
+  void pmaxuw(XmmRegister dst, XmmRegister src);
+  void pminud(XmmRegister dst, XmmRegister src);
+  void pmaxud(XmmRegister dst, XmmRegister src);
+
+  void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void maxps(XmmRegister dst, XmmRegister src);
+  void minpd(XmmRegister dst, XmmRegister src);
+  void maxpd(XmmRegister dst, XmmRegister src);
+
   void pcmpeqb(XmmRegister dst, XmmRegister src);
   void pcmpeqw(XmmRegister dst, XmmRegister src);
   void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f75f972..34f2a47 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -613,6 +613,70 @@
   DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
 }
 
+TEST_F(AssemblerX86Test, PMinSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86Test, PMaxSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86Test, PMinSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86Test, PMaxSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86Test, PMinSD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86Test, PMaxSD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86Test, PMinUB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86Test, PMaxUB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86Test, PMinUW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86Test, PMaxUW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86Test, PMinUD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86Test, PMaxUD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86Test, MinPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86Test, MaxPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86Test, MinPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86Test, MaxPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
 TEST_F(AssemblerX86Test, PCmpeqB) {
   DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1b7a485..82d1174 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1445,6 +1445,156 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x38);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3C);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xEA);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xEE);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x39);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDA);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDE);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3A);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3E);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3B);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0ddc46c..6e584fe 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -526,6 +526,25 @@
   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pavgw(XmmRegister dst, XmmRegister src);
 
+  void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxsb(XmmRegister dst, XmmRegister src);
+  void pminsw(XmmRegister dst, XmmRegister src);
+  void pmaxsw(XmmRegister dst, XmmRegister src);
+  void pminsd(XmmRegister dst, XmmRegister src);
+  void pmaxsd(XmmRegister dst, XmmRegister src);
+
+  void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxub(XmmRegister dst, XmmRegister src);
+  void pminuw(XmmRegister dst, XmmRegister src);
+  void pmaxuw(XmmRegister dst, XmmRegister src);
+  void pminud(XmmRegister dst, XmmRegister src);
+  void pmaxud(XmmRegister dst, XmmRegister src);
+
+  void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void maxps(XmmRegister dst, XmmRegister src);
+  void minpd(XmmRegister dst, XmmRegister src);
+  void maxpd(XmmRegister dst, XmmRegister src);
+
   void pcmpeqb(XmmRegister dst, XmmRegister src);
   void pcmpeqw(XmmRegister dst, XmmRegister src);
   void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index e7d8401..b574003 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1301,6 +1301,70 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
 }
 
+TEST_F(AssemblerX86_64Test, Pminsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pminub) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxub) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86_64Test, Pminuw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxuw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminud) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxud) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86_64Test, Minps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86_64Test, Maxps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86_64Test, Minpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86_64Test, Maxpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
 TEST_F(AssemblerX86_64Test, PCmpeqb) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 58f3948..660409f 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1374,6 +1374,26 @@
       oat_filenames_.push_back(oat_location_.c_str());
     }
 
+    // If we're updating in place a vdex file, be defensive and put an invalid vdex magic in case
+    // dex2oat gets killed.
+    // Note: we're only invalidating the magic data in the file, as dex2oat needs the rest of
+    // the information to remain valid.
+    if (update_input_vdex_) {
+      std::unique_ptr<BufferedOutputStream> vdex_out(MakeUnique<BufferedOutputStream>(
+          MakeUnique<FileOutputStream>(vdex_files_.back().get())));
+      if (!vdex_out->WriteFully(&VdexFile::Header::kVdexInvalidMagic,
+                                arraysize(VdexFile::Header::kVdexInvalidMagic))) {
+        PLOG(ERROR) << "Failed to invalidate vdex header. File: " << vdex_out->GetLocation();
+        return false;
+      }
+
+      if (!vdex_out->Flush()) {
+        PLOG(ERROR) << "Failed to flush stream after invalidating header of vdex file."
+                    << " File: " << vdex_out->GetLocation();
+        return false;
+      }
+    }
+
     // Swap file handling
     //
     // If the swap fd is not -1, we assume this is the file descriptor of an open but unlinked file
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index d546072..6420aa8 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -430,6 +430,9 @@
 };
 
 TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  // Native memory usage isn't correctly tracked under sanitization.
+  TEST_DISABLED_FOR_MEMORY_TOOL_ASAN();
+
   // The `native_alloc_2_ >= native_alloc_1_` assertion below may not
   // hold true on some x86 systems; disable this test while we
   // investigate (b/29259363).
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index a2116cd..588a3ae 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -20,7 +20,7 @@
         "dexlayout.cc",
         "dex_ir.cc",
         "dex_ir_builder.cc",
-	"dex_verify.cc",
+        "dex_verify.cc",
         "dex_visualize.cc",
         "dex_writer.cc",
     ],
@@ -43,6 +43,7 @@
 
 art_cc_binary {
     name: "dexlayout",
+    defaults: ["art_defaults"],
     host_supported: true,
     srcs: ["dexlayout_main.cc"],
     cflags: ["-Wall"],
@@ -61,13 +62,28 @@
 
 art_cc_binary {
     name: "dexdiag",
-    host_supported: false,
+    defaults: ["art_defaults"],
+    host_supported: true,
     srcs: ["dexdiag.cc"],
     cflags: ["-Wall"],
     shared_libs: [
         "libart",
         "libart-dexlayout",
-        "libpagemap",
     ],
+    target: {
+        android: {
+            shared_libs: [
+                "libpagemap",
+            ]
+        },
+    }
 }
 
+art_cc_test {
+    name: "art_dexdiag_tests",
+    host_supported: true,
+    defaults: [
+        "art_gtest_defaults",
+    ],
+    srcs: ["dexdiag_test.cc"],
+}
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index f1c6f67..cf453b9 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -281,6 +281,16 @@
       item->SetDouble(conv.d);
       break;
     }
+    case DexFile::kDexAnnotationMethodType: {
+      const uint32_t proto_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item->SetProtoId(GetProtoId(proto_index));
+      break;
+    }
+    case DexFile::kDexAnnotationMethodHandle: {
+      const uint32_t method_handle_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item->SetMethodHandle(GetMethodHandle(method_handle_index));
+      break;
+    }
     case DexFile::kDexAnnotationString: {
       const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
       item->SetStringId(GetStringId(string_index));
@@ -766,6 +776,64 @@
   return class_data;
 }
 
+void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) {
+  // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems.
+  const DexFile::MapList* map =
+      reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset());
+  for (uint32_t i = 0; i < map->size_; ++i) {
+    const DexFile::MapItem* item = map->list_ + i;
+    switch (item->type_) {
+      case DexFile::kDexTypeCallSiteIdItem:
+        SetCallSiteIdsOffset(item->offset_);
+        break;
+      case DexFile::kDexTypeMethodHandleItem:
+        SetMethodHandleItemsOffset(item->offset_);
+        break;
+      default:
+        break;
+    }
+  }
+  // Populate MethodHandleItems first (CallSiteIds may depend on them).
+  for (uint32_t i = 0; i < dex_file.NumMethodHandles(); i++) {
+    CreateMethodHandleItem(dex_file, i);
+  }
+  // Populate CallSiteIds.
+  for (uint32_t i = 0; i < dex_file.NumCallSiteIds(); i++) {
+    CreateCallSiteId(dex_file, i);
+  }
+}
+
+void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) {
+  const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i);
+  const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_;
+  EncodedArrayItem* call_site_item =
+      CreateEncodedArrayItem(disk_call_item_ptr, disk_call_site_id.data_off_);
+
+  CallSiteId* call_site_id = new CallSiteId(call_site_item);
+  call_site_ids_.AddIndexedItem(call_site_id, CallSiteIdsOffset() + i * CallSiteId::ItemSize(), i);
+}
+
+void Collections::CreateMethodHandleItem(const DexFile& dex_file, uint32_t i) {
+  const DexFile::MethodHandleItem& disk_method_handle = dex_file.GetMethodHandle(i);
+  uint16_t index = disk_method_handle.field_or_method_idx_;
+  DexFile::MethodHandleType type =
+      static_cast<DexFile::MethodHandleType>(disk_method_handle.method_handle_type_);
+  bool is_invoke = type == DexFile::MethodHandleType::kInvokeStatic ||
+                   type == DexFile::MethodHandleType::kInvokeInstance ||
+                   type == DexFile::MethodHandleType::kInvokeConstructor;
+  static_assert(DexFile::MethodHandleType::kLast == DexFile::MethodHandleType::kInvokeConstructor,
+                "Unexpected method handle types.");
+  IndexedItem* field_or_method_id;
+  if (is_invoke) {
+    field_or_method_id = GetMethodId(index);
+  } else {
+    field_or_method_id = GetFieldId(index);
+  }
+  MethodHandleItem* method_handle = new MethodHandleItem(type, field_or_method_id);
+  method_handle_items_.AddIndexedItem(
+      method_handle, MethodHandleItemsOffset() + i * MethodHandleItem::ItemSize(), i);
+}
+
 static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
   return 0;
 }
@@ -823,6 +891,16 @@
     &dex_ir::Collections::ClassDefsSize,
     &dex_ir::Collections::ClassDefsOffset
   }, {
+    "CallSiteId",
+    DexFile::kDexTypeCallSiteIdItem,
+    &dex_ir::Collections::CallSiteIdsSize,
+    &dex_ir::Collections::CallSiteIdsOffset
+  }, {
+    "MethodHandle",
+    DexFile::kDexTypeMethodHandleItem,
+    &dex_ir::Collections::MethodHandleItemsSize,
+    &dex_ir::Collections::MethodHandleItemsOffset
+  }, {
     "StringData",
     DexFile::kDexTypeStringDataItem,
     &dex_ir::Collections::StringDatasSize,
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index cad0395..5692eb2 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -35,6 +35,7 @@
 class AnnotationsDirectoryItem;
 class AnnotationSetItem;
 class AnnotationSetRefList;
+class CallSiteId;
 class ClassData;
 class ClassDef;
 class CodeItem;
@@ -47,6 +48,7 @@
 class Header;
 class MapList;
 class MapItem;
+class MethodHandleItem;
 class MethodId;
 class MethodItem;
 class ParameterAnnotation;
@@ -65,6 +67,8 @@
 static constexpr size_t kFieldIdItemSize = 8;
 static constexpr size_t kMethodIdItemSize = 8;
 static constexpr size_t kClassDefItemSize = 32;
+static constexpr size_t kCallSiteIdItemSize = 4;
+static constexpr size_t kMethodHandleItemSize = 8;
 
 // Visitor support
 class AbstractDispatcher {
@@ -79,6 +83,8 @@
   virtual void Dispatch(const ProtoId* proto_id) = 0;
   virtual void Dispatch(const FieldId* field_id) = 0;
   virtual void Dispatch(const MethodId* method_id) = 0;
+  virtual void Dispatch(const CallSiteId* call_site_id) = 0;
+  virtual void Dispatch(const MethodHandleItem* method_handle_item) = 0;
   virtual void Dispatch(ClassData* class_data) = 0;
   virtual void Dispatch(ClassDef* class_def) = 0;
   virtual void Dispatch(FieldItem* field_item) = 0;
@@ -165,6 +171,9 @@
   std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); }
   std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); }
   std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); }
+  std::vector<std::unique_ptr<CallSiteId>>& CallSiteIds() { return call_site_ids_.Collection(); }
+  std::vector<std::unique_ptr<MethodHandleItem>>& MethodHandleItems()
+      { return method_handle_items_.Collection(); }
   std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas()
       { return string_datas_.Collection(); }
   std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); }
@@ -189,6 +198,10 @@
   void CreateFieldId(const DexFile& dex_file, uint32_t i);
   void CreateMethodId(const DexFile& dex_file, uint32_t i);
   void CreateClassDef(const DexFile& dex_file, uint32_t i);
+  void CreateCallSiteId(const DexFile& dex_file, uint32_t i);
+  void CreateMethodHandleItem(const DexFile& dex_file, uint32_t i);
+
+  void CreateCallSitesAndMethodHandles(const DexFile& dex_file);
 
   TypeList* CreateTypeList(const DexFile::TypeList* type_list, uint32_t offset);
   EncodedArrayItem* CreateEncodedArrayItem(const uint8_t* static_data, uint32_t offset);
@@ -207,6 +220,8 @@
   FieldId* GetFieldId(uint32_t index) { return FieldIds()[index].get(); }
   MethodId* GetMethodId(uint32_t index) { return MethodIds()[index].get(); }
   ClassDef* GetClassDef(uint32_t index) { return ClassDefs()[index].get(); }
+  CallSiteId* GetCallSiteId(uint32_t index) { return CallSiteIds()[index].get(); }
+  MethodHandleItem* GetMethodHandle(uint32_t index) { return MethodHandleItems()[index].get(); }
 
   StringId* GetStringIdOrNullPtr(uint32_t index) {
     return index == DexFile::kDexNoIndex ? nullptr : GetStringId(index);
@@ -221,6 +236,8 @@
   uint32_t FieldIdsOffset() const { return field_ids_.GetOffset(); }
   uint32_t MethodIdsOffset() const { return method_ids_.GetOffset(); }
   uint32_t ClassDefsOffset() const { return class_defs_.GetOffset(); }
+  uint32_t CallSiteIdsOffset() const { return call_site_ids_.GetOffset(); }
+  uint32_t MethodHandleItemsOffset() const { return method_handle_items_.GetOffset(); }
   uint32_t StringDatasOffset() const { return string_datas_.GetOffset(); }
   uint32_t TypeListsOffset() const { return type_lists_.GetOffset(); }
   uint32_t EncodedArrayItemsOffset() const { return encoded_array_items_.GetOffset(); }
@@ -240,6 +257,9 @@
   void SetFieldIdsOffset(uint32_t new_offset) { field_ids_.SetOffset(new_offset); }
   void SetMethodIdsOffset(uint32_t new_offset) { method_ids_.SetOffset(new_offset); }
   void SetClassDefsOffset(uint32_t new_offset) { class_defs_.SetOffset(new_offset); }
+  void SetCallSiteIdsOffset(uint32_t new_offset) { call_site_ids_.SetOffset(new_offset); }
+  void SetMethodHandleItemsOffset(uint32_t new_offset)
+      { method_handle_items_.SetOffset(new_offset); }
   void SetStringDatasOffset(uint32_t new_offset) { string_datas_.SetOffset(new_offset); }
   void SetTypeListsOffset(uint32_t new_offset) { type_lists_.SetOffset(new_offset); }
   void SetEncodedArrayItemsOffset(uint32_t new_offset)
@@ -262,6 +282,8 @@
   uint32_t FieldIdsSize() const { return field_ids_.Size(); }
   uint32_t MethodIdsSize() const { return method_ids_.Size(); }
   uint32_t ClassDefsSize() const { return class_defs_.Size(); }
+  uint32_t CallSiteIdsSize() const { return call_site_ids_.Size(); }
+  uint32_t MethodHandleItemsSize() const { return method_handle_items_.Size(); }
   uint32_t StringDatasSize() const { return string_datas_.Size(); }
   uint32_t TypeListsSize() const { return type_lists_.Size(); }
   uint32_t EncodedArrayItemsSize() const { return encoded_array_items_.Size(); }
@@ -288,6 +310,8 @@
   CollectionVector<FieldId> field_ids_;
   CollectionVector<MethodId> method_ids_;
   CollectionVector<ClassDef> class_defs_;
+  CollectionVector<CallSiteId> call_site_ids_;
+  CollectionVector<MethodHandleItem> method_handle_items_;
 
   CollectionMap<StringData> string_datas_;
   CollectionMap<TypeList> type_lists_;
@@ -603,8 +627,10 @@
   void SetDouble(double d) { u_.double_val_ = d; }
   void SetStringId(StringId* string_id) { u_.string_val_ = string_id; }
   void SetTypeId(TypeId* type_id) { u_.type_val_ = type_id; }
+  void SetProtoId(ProtoId* proto_id) { u_.proto_val_ = proto_id; }
   void SetFieldId(FieldId* field_id) { u_.field_val_ = field_id; }
   void SetMethodId(MethodId* method_id) { u_.method_val_ = method_id; }
+  void SetMethodHandle(MethodHandleItem* method_handle) { u_.method_handle_val_ = method_handle; }
   void SetEncodedArray(EncodedArrayItem* encoded_array) { encoded_array_.reset(encoded_array); }
   void SetEncodedAnnotation(EncodedAnnotation* encoded_annotation)
       { encoded_annotation_.reset(encoded_annotation); }
@@ -619,8 +645,10 @@
   double GetDouble() const { return u_.double_val_; }
   StringId* GetStringId() const { return u_.string_val_; }
   TypeId* GetTypeId() const { return u_.type_val_; }
+  ProtoId* GetProtoId() const { return u_.proto_val_; }
   FieldId* GetFieldId() const { return u_.field_val_; }
   MethodId* GetMethodId() const { return u_.method_val_; }
+  MethodHandleItem* GetMethodHandle() const { return u_.method_handle_val_; }
   EncodedArrayItem* GetEncodedArray() const { return encoded_array_.get(); }
   EncodedAnnotation* GetEncodedAnnotation() const { return encoded_annotation_.get(); }
 
@@ -639,8 +667,10 @@
     double double_val_;
     StringId* string_val_;
     TypeId* type_val_;
+    ProtoId* proto_val_;
     FieldId* field_val_;
     MethodId* method_val_;
+    MethodHandleItem* method_handle_val_;
   } u_;
   std::unique_ptr<EncodedArrayItem> encoded_array_;
   std::unique_ptr<EncodedAnnotation> encoded_annotation_;
@@ -1087,6 +1117,48 @@
   DISALLOW_COPY_AND_ASSIGN(AnnotationsDirectoryItem);
 };
 
+class CallSiteId : public IndexedItem {
+ public:
+  explicit CallSiteId(EncodedArrayItem* call_site_item) : call_site_item_(call_site_item) {
+    size_ = kCallSiteIdItemSize;
+  }
+  ~CallSiteId() OVERRIDE { }
+
+  static size_t ItemSize() { return kCallSiteIdItemSize; }
+
+  EncodedArrayItem* CallSiteItem() const { return call_site_item_; }
+
+  void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); }
+
+ private:
+  EncodedArrayItem* call_site_item_;
+
+  DISALLOW_COPY_AND_ASSIGN(CallSiteId);
+};
+
+class MethodHandleItem : public IndexedItem {
+ public:
+  MethodHandleItem(DexFile::MethodHandleType method_handle_type, IndexedItem* field_or_method_id)
+      : method_handle_type_(method_handle_type),
+        field_or_method_id_(field_or_method_id) {
+    size_ = kMethodHandleItemSize;
+  }
+  ~MethodHandleItem() OVERRIDE { }
+
+  static size_t ItemSize() { return kMethodHandleItemSize; }
+
+  DexFile::MethodHandleType GetMethodHandleType() const { return method_handle_type_; }
+  IndexedItem* GetFieldOrMethodId() const { return field_or_method_id_; }
+
+  void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); }
+
+ private:
+  DexFile::MethodHandleType method_handle_type_;
+  IndexedItem* field_or_method_id_;
+
+  DISALLOW_COPY_AND_ASSIGN(MethodHandleItem);
+};
+
 // TODO(sehr): implement MapList.
 class MapList : public Item {
  public:
diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc
index d0c5bf9..8eb726a 100644
--- a/dexlayout/dex_ir_builder.cc
+++ b/dexlayout/dex_ir_builder.cc
@@ -72,6 +72,8 @@
   }
   // MapItem.
   collections.SetMapListOffset(disk_header.map_off_);
+  // CallSiteIds and MethodHandleItems.
+  collections.CreateCallSitesAndMethodHandles(dex_file);
 
   CheckAndSetRemainingOffsets(dex_file, &collections);
 
@@ -115,6 +117,14 @@
         CHECK_EQ(item->size_, collections->ClassDefsSize());
         CHECK_EQ(item->offset_, collections->ClassDefsOffset());
         break;
+      case DexFile::kDexTypeCallSiteIdItem:
+        CHECK_EQ(item->size_, collections->CallSiteIdsSize());
+        CHECK_EQ(item->offset_, collections->CallSiteIdsOffset());
+        break;
+      case DexFile::kDexTypeMethodHandleItem:
+        CHECK_EQ(item->size_, collections->MethodHandleItemsSize());
+        CHECK_EQ(item->offset_, collections->MethodHandleItemsOffset());
+        break;
       case DexFile::kDexTypeMapList:
         CHECK_EQ(item->size_, 1u);
         CHECK_EQ(item->offset_, disk_header.map_off_);
diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc
index 7ffa38b..e1b828c 100644
--- a/dexlayout/dex_writer.cc
+++ b/dexlayout/dex_writer.cc
@@ -151,6 +151,12 @@
       length = EncodeDoubleValue(encoded_value->GetDouble(), buffer);
       start = 8 - length;
       break;
+    case DexFile::kDexAnnotationMethodType:
+      length = EncodeUIntValue(encoded_value->GetProtoId()->GetIndex(), buffer);
+      break;
+    case DexFile::kDexAnnotationMethodHandle:
+      length = EncodeUIntValue(encoded_value->GetMethodHandle()->GetIndex(), buffer);
+      break;
     case DexFile::kDexAnnotationString:
       length = EncodeUIntValue(encoded_value->GetStringId()->GetIndex(), buffer);
       break;
@@ -485,6 +491,27 @@
   }
 }
 
+void DexWriter::WriteCallSites() {
+  uint32_t call_site_off[1];
+  for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id :
+      header_->GetCollections().CallSiteIds()) {
+    call_site_off[0] = call_site_id->CallSiteItem()->GetOffset();
+    Write(call_site_off, call_site_id->GetSize(), call_site_id->GetOffset());
+  }
+}
+
+void DexWriter::WriteMethodHandles() {
+  uint16_t method_handle_buff[4];
+  for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle :
+      header_->GetCollections().MethodHandleItems()) {
+    method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType());
+    method_handle_buff[1] = 0;  // unused.
+    method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex();
+    method_handle_buff[3] = 0;  // unused.
+    Write(method_handle_buff, method_handle->GetSize(), method_handle->GetOffset());
+  }
+}
+
 struct MapItemContainer {
   MapItemContainer(uint32_t type, uint32_t size, uint32_t offset)
       : type_(type), size_(size), offset_(offset) { }
@@ -528,6 +555,14 @@
     queue.push(MapItemContainer(DexFile::kDexTypeClassDefItem, collection.ClassDefsSize(),
         collection.ClassDefsOffset()));
   }
+  if (collection.CallSiteIdsSize() != 0) {
+    queue.push(MapItemContainer(DexFile::kDexTypeCallSiteIdItem, collection.CallSiteIdsSize(),
+        collection.CallSiteIdsOffset()));
+  }
+  if (collection.MethodHandleItemsSize() != 0) {
+    queue.push(MapItemContainer(DexFile::kDexTypeMethodHandleItem,
+        collection.MethodHandleItemsSize(), collection.MethodHandleItemsOffset()));
+  }
 
   // Data section.
   queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset()));
@@ -618,10 +653,8 @@
   uint32_t class_defs_off = collections.ClassDefsOffset();
   buffer[16] = class_defs_size;
   buffer[17] = class_defs_off;
-  uint32_t data_off = class_defs_off + class_defs_size * dex_ir::ClassDef::ItemSize();
-  uint32_t data_size = file_size - data_off;
-  buffer[18] = data_size;
-  buffer[19] = data_off;
+  buffer[18] = header_->DataSize();
+  buffer[19] = header_->DataOffset();
   Write(buffer, 20 * sizeof(uint32_t), offset);
 }
 
@@ -640,6 +673,8 @@
   WriteDebugInfoItems();
   WriteCodeItems();
   WriteClasses();
+  WriteCallSites();
+  WriteMethodHandles();
   WriteMapItem();
   WriteHeader();
 }
diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h
index fb76e5c..b396adf 100644
--- a/dexlayout/dex_writer.h
+++ b/dexlayout/dex_writer.h
@@ -59,6 +59,8 @@
   void WriteDebugInfoItems();
   void WriteCodeItems();
   void WriteClasses();
+  void WriteCallSites();
+  void WriteMethodHandles();
   void WriteMapItem();
   void WriteHeader();
 
diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc
index ea2679a..78860e3 100644
--- a/dexlayout/dexdiag.cc
+++ b/dexlayout/dexdiag.cc
@@ -15,6 +15,7 @@
  */
 
 #include <errno.h>
+#include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -30,7 +31,9 @@
 #include "dex_file.h"
 #include "dex_ir.h"
 #include "dex_ir_builder.h"
+#ifdef ART_TARGET_ANDROID
 #include "pagemap/pagemap.h"
+#endif
 #include "runtime.h"
 #include "vdex_file.h"
 
@@ -38,8 +41,6 @@
 
 using android::base::StringPrintf;
 
-static constexpr size_t kLineLength = 32;
-
 static bool g_verbose = false;
 
 // The width needed to print a file page offset (32-bit).
@@ -164,6 +165,7 @@
   std::cout << ". (Mapped page not resident)" << std::endl;
 }
 
+#ifdef ART_TARGET_ANDROID
 static char PageTypeChar(uint16_t type) {
   if (kDexSectionInfoMap.find(type) == kDexSectionInfoMap.end()) {
     return '-';
@@ -194,10 +196,12 @@
                            size_t end,
                            const std::vector<dex_ir::DexFileSection>& sections,
                            PageCount* page_counts) {
+  static constexpr size_t kLineLength = 32;
   for (size_t page = start; page < end; ++page) {
     char type_char = '.';
     if (PM_PAGEMAP_PRESENT(pagemap[page])) {
-      uint16_t type = FindSectionTypeForPage(page, sections);
+      const size_t dex_page_offset = page - start;
+      uint16_t type = FindSectionTypeForPage(dex_page_offset, sections);
       page_counts->Increment(type);
       type_char = PageTypeChar(type);
     }
@@ -228,7 +232,8 @@
     return;
   }
   for (size_t page = start; page < end; ++page) {
-    mapped_pages.Increment(FindSectionTypeForPage(page, sections));
+    const size_t dex_page_offset = page - start;
+    mapped_pages.Increment(FindSectionTypeForPage(dex_page_offset, sections));
   }
   size_t total_resident_pages = 0;
   printer->PrintHeader();
@@ -268,7 +273,7 @@
     std::cerr << "Dex file start offset for "
               << dex_file->GetLocation().c_str()
               << " is incorrect: map start "
-              << StringPrintf("%zx > dex start %zx\n", map_start, dex_file_start)
+              << StringPrintf("%" PRIx64 " > dex start %" PRIx64 "\n", map_start, dex_file_start)
               << std::endl;
     return;
   }
@@ -277,7 +282,7 @@
   uint64_t end_page = RoundUp(start_address + dex_file_size, kPageSize) / kPageSize;
   std::cout << "DEX "
             << dex_file->GetLocation().c_str()
-            << StringPrintf(": %zx-%zx",
+            << StringPrintf(": %" PRIx64 "-%" PRIx64,
                             map_start + start_page * kPageSize,
                             map_start + end_page * kPageSize)
             << std::endl;
@@ -293,21 +298,20 @@
   DisplayDexStatistics(start_page, end_page, section_resident_pages, sections, printer);
 }
 
-static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) {
+static bool IsVdexFileMapping(const std::string& mapped_name) {
   // Confirm that the map is from a vdex file.
   static const char* suffixes[] = { ".vdex" };
-  std::string vdex_name;
-  bool found = false;
-  for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) {
-    if (strstr(pm_map_name(map), suffixes[j]) != nullptr) {
-      vdex_name = pm_map_name(map);
-      found = true;
-      break;
+  for (const char* suffix : suffixes) {
+    size_t match_loc = mapped_name.find(suffix);
+    if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) {
+      return true;
     }
   }
-  if (!found) {
-    return true;
-  }
+  return false;
+}
+
+static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) {
+  std::string vdex_name = pm_map_name(map);
   // Extract all the dex files from the vdex file.
   std::string error_msg;
   std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name,
@@ -331,6 +335,7 @@
               << ": error "
               << error_msg
               << std::endl;
+    return false;
   }
   // Open the page mapping (one uint64_t per page) for the entire vdex mapping.
   uint64_t* pagemap;
@@ -342,7 +347,7 @@
   // Process the dex files.
   std::cout << "MAPPING "
             << pm_map_name(map)
-            << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map))
+            << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map))
             << std::endl;
   for (const auto& dex_file : dex_files) {
     ProcessOneDexMapping(pagemap,
@@ -356,6 +361,7 @@
 }
 
 static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printer) {
+  static constexpr size_t kLineLength = 32;
   size_t resident_page_count = 0;
   for (size_t page = 0; page < size; ++page) {
     char type_char = '.';
@@ -381,21 +387,19 @@
   printer->PrintSkipLine();
 }
 
-static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) {
-  // Confirm that the map is from a vdex file.
+static bool IsOatFileMapping(const std::string& mapped_name) {
+  // Confirm that the map is from an oat file.
   static const char* suffixes[] = { ".odex", ".oat" };
-  std::string vdex_name;
-  bool found = false;
-  for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) {
-    if (strstr(pm_map_name(map), suffixes[j]) != nullptr) {
-      vdex_name = pm_map_name(map);
-      found = true;
-      break;
+  for (const char* suffix : suffixes) {
+    size_t match_loc = mapped_name.find(suffix);
+    if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) {
+      return true;
     }
   }
-  if (!found) {
-    return true;
-  }
+  return false;
+}
+
+static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) {
   // Open the page mapping (one uint64_t per page) for the entire vdex mapping.
   uint64_t* pagemap;
   size_t len;
@@ -406,7 +410,7 @@
   // Process the dex files.
   std::cout << "MAPPING "
             << pm_map_name(map)
-            << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map))
+            << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map))
             << std::endl;
   ProcessOneOatMapping(pagemap, len, printer);
   free(pagemap);
@@ -426,9 +430,10 @@
   }
   return false;
 }
+#endif
 
 static void Usage(const char* cmd) {
-  std::cerr << "Usage: " << cmd << " [options] pid" << std::endl
+  std::cout << "Usage: " << cmd << " [options] pid" << std::endl
             << "    --contains=<string>:  Display sections containing string." << std::endl
             << "    --help:               Shows this message." << std::endl
             << "    --verbose:            Makes displays verbose." << std::endl;
@@ -463,6 +468,7 @@
   InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
+#ifdef ART_TARGET_ANDROID
   pid_t pid;
   char* endptr;
   pid = (pid_t)strtol(argv[argc - 1], &endptr, 10);
@@ -496,7 +502,8 @@
     return EXIT_FAILURE;
   }
 
-  // Process the mappings that are due to DEX files.
+  bool match_found = false;
+  // Process the mappings that are due to vdex or oat files.
   Printer printer;
   for (size_t i = 0; i < num_maps; ++i) {
     std::string mapped_file_name = pm_map_name(maps[i]);
@@ -504,12 +511,23 @@
     if (!FilterByNameContains(mapped_file_name, name_filters)) {
       continue;
     }
-    if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) {
-      return EXIT_FAILURE;
-    } else if (!DisplayMappingIfFromOatFile(maps[i], &printer)) {
-      return EXIT_FAILURE;
+    if (IsVdexFileMapping(mapped_file_name)) {
+      if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) {
+        return EXIT_FAILURE;
+      }
+      match_found = true;
+    } else if (IsOatFileMapping(mapped_file_name)) {
+      if (!DisplayMappingIfFromOatFile(maps[i], &printer)) {
+        return EXIT_FAILURE;
+      }
+      match_found = true;
     }
   }
+  if (!match_found) {
+    std::cerr << "No relevant memory maps were found." << std::endl;
+    return EXIT_FAILURE;
+  }
+#endif
 
   return EXIT_SUCCESS;
 }
diff --git a/dexlayout/dexdiag_test.cc b/dexlayout/dexdiag_test.cc
new file mode 100644
index 0000000..a0b3f32
--- /dev/null
+++ b/dexlayout/dexdiag_test.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <vector>
+
+#include "common_runtime_test.h"
+
+#include "runtime/exec_utils.h"
+#include "runtime/oat_file.h"
+#include "runtime/os.h"
+
+namespace art {
+
+static const char* kDexDiagContains = "--contains=core.vdex";
+static const char* kDexDiagContainsFails = "--contains=anything_other_than_core.vdex";
+static const char* kDexDiagHelp = "--help";
+static const char* kDexDiagVerbose = "--verbose";
+static const char* kDexDiagBinaryName = "dexdiag";
+
+class DexDiagTest : public CommonRuntimeTest {
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+  }
+
+  // Path to the dexdiag(d?)[32|64] binary.
+  std::string GetDexDiagFilePath() {
+    std::string root = GetTestAndroidRoot();
+
+    root += "/bin/";
+    root += kDexDiagBinaryName;
+
+    std::string root32 = root + "32";
+    // If we have both a 32-bit and a 64-bit build, the 32-bit file will have a 32 suffix.
+    if (OS::FileExists(root32.c_str()) && !Is64BitInstructionSet(kRuntimeISA)) {
+      return root32;
+    } else {
+      // This is a 64-bit build or only a single build exists.
+      return root;
+    }
+  }
+
+  std::unique_ptr<OatFile> OpenOatAndVdexFiles() {
+    // Open the core.oat file.
+    // This is a little convoluted because we have to
+    // get the location of the default core image (.../framework/core.oat),
+    // find it in the right architecture subdirectory (.../framework/arm/core.oat),
+    // Then, opening the oat file has the side-effect of opening the corresponding
+    // vdex file (.../framework/arm/core.vdex).
+    const std::string default_location = GetCoreOatLocation();
+    EXPECT_TRUE(!default_location.empty());
+    std::string oat_location = GetSystemImageFilename(default_location.c_str(), kRuntimeISA);
+    EXPECT_TRUE(!oat_location.empty());
+    std::cout << "==" << oat_location << std::endl;
+    std::string error_msg;
+    std::unique_ptr<OatFile> oat(OatFile::Open(oat_location.c_str(),
+                                               oat_location.c_str(),
+                                               nullptr,
+                                               nullptr,
+                                               false,
+                                               /*low_4gb*/false,
+                                               nullptr,
+                                               &error_msg));
+    EXPECT_TRUE(oat != nullptr) << error_msg;
+    return oat;
+  }
+
+  // Run dexdiag with a custom boot image location.
+  bool Exec(pid_t this_pid, const std::vector<std::string>& args, std::string* error_msg) {
+    // Invoke 'dexdiag' against the current process.
+    // This should succeed because we have a runtime and so it should
+    // be able to map in the boot.art and do a diff for it.
+    std::vector<std::string> exec_argv;
+
+    // Build the command line "dexdiag <args> this_pid".
+    std::string executable_path = GetDexDiagFilePath();
+    EXPECT_TRUE(OS::FileExists(executable_path.c_str())) << executable_path
+                                                         << " should be a valid file path";
+    exec_argv.push_back(executable_path);
+    for (const auto& arg : args) {
+      exec_argv.push_back(arg);
+    }
+    exec_argv.push_back(std::to_string(this_pid));
+
+    return ::art::Exec(exec_argv, error_msg);
+  }
+};
+
+// We can't run these tests on the host, as they will fail when trying to open
+// /proc/pid/pagemap.
+// On the target, we invoke 'dexdiag' against the current process.
+// This should succeed because we have a runtime and so dexdiag should
+// be able to find the map for, e.g., boot.vdex and friends.
+TEST_F(DexDiagTest, DexDiagHelpTest) {
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagHelp }, &error_msg)) << "Failed to execute -- because: "
+                                                            << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagContainsTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagContainsTest) {
+#endif
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagContains }, &error_msg)) << "Failed to execute -- because: "
+                                                                << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagContainsFailsTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagContainsFailsTest) {
+#endif
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  // TODO: test the resulting output.
+  std::string error_msg;
+  ASSERT_FALSE(Exec(getpid(), { kDexDiagContainsFails }, &error_msg))
+      << "Failed to execute -- because: "
+      << error_msg;
+}
+
+#if defined (ART_TARGET)
+TEST_F(DexDiagTest, DexDiagVerboseTest) {
+#else
+TEST_F(DexDiagTest, DISABLED_DexDiagVerboseTest) {
+#endif
+  // TODO: test the resulting output.
+  std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(getpid(), { kDexDiagVerbose }, &error_msg)) << "Failed to execute -- because: "
+                                                               << error_msg;
+}
+
+}  // namespace art
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 9f7861f..205c0d1 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1909,7 +1909,7 @@
   }
   // Do IR-level comparison between input and output. This check ignores potential differences
   // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
-  if (options_.verify_output_) {
+  if (kIsDebugBuild || options_.verify_output_) {
     std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
     CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
   }
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 877ea92..5a6a20d 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -41,7 +41,7 @@
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
 static const char kDexFileLayoutInputProfile[] =
-    "cHJvADAwNQABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
+    "cHJvADAwNwAAAAAAAAgAAAB4AQMAAAAAAQ==";
 
 // Dex file with catch handler unreferenced by try blocks.
 // Constructed by building a dex file with try/catch blocks and hex editing.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 3c60bf4..8894cc9 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -456,6 +456,7 @@
   { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x7, "ldi", "kx" },
   { kMsaSpecialMask | (0xf << 2), kMsa | (0x8 << 2), "ld", "kw" },
   { kMsaSpecialMask | (0xf << 2), kMsa | (0x9 << 2), "st", "kw" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x14, "ilvr", "Vkmn" },
 };
 
 static uint32_t ReadU32(const uint8_t* ptr) {
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index e12bcec..4824f70 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -581,13 +581,69 @@
               load = true;
               src_reg_file = dst_reg_file = SSE;
               break;
-            case 0x39:
+            case 0x37:
               opcode1 = "pcmpgtq";
               prefix[2] = 0;
               has_modrm = true;
               load = true;
               src_reg_file = dst_reg_file = SSE;
               break;
+            case 0x38:
+              opcode1 = "pminsb";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x39:
+              opcode1 = "pminsd";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3A:
+              opcode1 = "pminuw";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3B:
+              opcode1 = "pminud";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3C:
+              opcode1 = "pmaxsb";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3D:
+              opcode1 = "pmaxsd";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3E:
+              opcode1 = "pmaxuw";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3F:
+              opcode1 = "pmaxud";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
             case 0x40:
               opcode1 = "pmulld";
               prefix[2] = 0;
@@ -1133,8 +1189,12 @@
           opcode1 = opcode_tmp.c_str();
         }
         break;
+      case 0xDA:
+      case 0xDE:
       case 0xE0:
       case 0xE3:
+      case 0xEA:
+      case 0xEE:
         if (prefix[2] == 0x66) {
           src_reg_file = dst_reg_file = SSE;
           prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
@@ -1142,8 +1202,12 @@
           src_reg_file = dst_reg_file = MMX;
         }
         switch (*instr) {
+          case 0xDA: opcode1 = "pminub"; break;
+          case 0xDE: opcode1 = "pmaxub"; break;
           case 0xE0: opcode1 = "pavgb"; break;
           case 0xE3: opcode1 = "pavgw"; break;
+          case 0xEA: opcode1 = "pminsw"; break;
+          case 0xEE: opcode1 = "pmaxsw"; break;
         }
         prefix[2] = 0;
         has_modrm = true;
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 1869968..8ee5498 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -539,7 +539,6 @@
         "dex_file_test.cc",
         "dex_file_verifier_test.cc",
         "dex_instruction_test.cc",
-        "dex_instruction_visitor_test.cc",
         "dex_method_iterator_test.cc",
         "entrypoints/math_entrypoints_test.cc",
         "entrypoints/quick/quick_trampoline_entrypoints_test.cc",
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index a857976..d6056c0 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -71,6 +71,15 @@
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
 }  // namespace arm
 
 namespace arm64 {
@@ -83,6 +92,11 @@
 #undef FRAME_SIZE_SAVE_REFS_AND_ARGS
 static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
 #undef FRAME_SIZE_SAVE_EVERYTHING
+#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
 }  // namespace arm64
 
 namespace mips {
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index c03bcae..8f2fd6e 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -24,6 +24,36 @@
 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
 #define FRAME_SIZE_SAVE_EVERYTHING 192
 
+// The offset from the art_quick_read_barrier_mark_introspection (used for field
+// loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR,
+// i.e. art_quick_read_barrier_mark_introspection_narrow.
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20
+// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints,
+// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0
+// The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
+// i.e. art_quick_read_barrier_mark_introspection_arrays.
+#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
+
+// The offset of the reference load LDR from the return address in LR for field loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -4
+#else
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -4
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -2
+#endif
+// The offset of the reference load LDR from the return address in LR for array loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8
+#else
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4
+#endif
+// The offset of the reference load LDR from the return address in LR for GC root loads.
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6
+
 // Flag for enabling R4 optimization in arm runtime
 // #define ARM_R4_SUSPEND_FLAG
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index d21d0c0..919b0af 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -17,6 +17,7 @@
 #include <math.h>
 #include <string.h>
 
+#include "arch/arm/asm_support_arm.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -51,6 +52,13 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
 
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_narrow(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow(
+    mirror::Object*);
+
 // Used by soft float.
 // Single-precision FP arithmetics.
 extern "C" float fmodf(float a, float b);              // REM_FLOAT[_2ADDR]
@@ -80,6 +88,31 @@
   qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr;
   qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
   qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
+
+  // For the alignment check, strip the Thumb mode bit.
+  DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u);
+  // Check the field narrow entrypoint offset from the introspection entrypoint.
+  intptr_t narrow_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_narrow) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET, narrow_diff);
+  // Check array switch cases offsets from the introspection entrypoint.
+  intptr_t array_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff);
+  // Check the GC root entrypoint offsets from the introspection entrypoint.
+  intptr_t gc_roots_wide_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_wide) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET, gc_roots_wide_diff);
+  intptr_t gc_roots_narrow_diff =
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) -
+      reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+  DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff);
+  // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12.
+  // We're using the entry to hold a pointer to the introspection entrypoint instead.
+  qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr;
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a277edf..31a7f6a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1681,8 +1681,8 @@
     .extern artDeoptimize
 ENTRY art_quick_deoptimize
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
-    mov    r0, r9         @ Set up args.
-    blx    artDeoptimize  @ artDeoptimize(Thread*)
+    mov    r0, r9         @ pass Thread::Current
+    blx    artDeoptimize  @ (Thread*)
 END art_quick_deoptimize
 
     /*
@@ -1691,9 +1691,9 @@
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
-    SETUP_SAVE_EVERYTHING_FRAME r0
-    mov    r0, r9                         @ Set up args.
-    blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
+    SETUP_SAVE_EVERYTHING_FRAME r1
+    mov    r1, r9                         @ pass Thread::Current
+    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
 END art_quick_deoptimize_from_compiled_code
 
     /*
@@ -2146,6 +2146,289 @@
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
 
+// Helper macros for Baker CC read barrier mark introspection (BRBMI).
+.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
+    \macro_for_register r0
+    \macro_for_register r1
+    \macro_for_register r2
+    \macro_for_register r3
+    \macro_for_reserved_register  // R4 is reserved for the entrypoint address.
+    \macro_for_register r5
+    \macro_for_register r6
+    \macro_for_register r7
+    \macro_for_register r8
+    \macro_for_register r9
+    \macro_for_register r10
+    \macro_for_register r11
+.endm
+
+.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
+    BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
+    \macro_for_reserved_register  // IP is reserved.
+    \macro_for_reserved_register  // SP is reserved.
+    \macro_for_reserved_register  // LR is reserved.
+    \macro_for_reserved_register  // PC is reserved.
+.endm
+
+.macro BRBMI_RETURN_SWITCH_CASE reg
+.Lmark_introspection_return_switch_case_\reg:
+    mov     \reg, ip
+    bx      lr
+.endm
+
+.macro BRBMI_BAD_RETURN_SWITCH_CASE
+.Lmark_introspection_return_switch_case_bad:
+    BRBMI_BKPT_FILL_4B
+.endm
+
+.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
+    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
+.endm
+
+.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
+    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
+.endm
+
+#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#error "Array and field introspection code sharing requires same LDR offset."
+#endif
+.macro BRBMI_ARRAY_LOAD index_reg
+    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
+    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
+    .balign 8                                           // Add padding to 8 bytes.
+.endm
+
+.macro BRBMI_BKPT_FILL_4B
+    bkpt    0
+    bkpt    0
+.endm
+
+.macro BRBMI_BKPT_FILL_8B
+    BRBMI_BKPT_FILL_4B
+    BRBMI_BKPT_FILL_4B
+.endm
+
+.macro BRBMI_RUNTIME_CALL
+    // Note: This macro generates exactly 22 bytes of code. The core register
+    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
+
+    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r7, 16
+    .cfi_rel_offset lr, 20
+
+    mov     r0, ip                    // Pass the reference.
+    vpush {s0-s15}                    // save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
+    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
+    vpop    {s0-s15}                  // restore floating-point registers
+    .cfi_adjust_cfa_offset -64
+    mov     ip, r0                    // Move reference to ip in preparation for return switch.
+
+    pop     {r0-r3, r7, lr}           // Restore registers.
+    .cfi_adjust_cfa_offset -24
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r7
+    .cfi_restore lr
+.endm
+
+.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
+    // If reference is null, just return it in the right register.
+    cmp     ip, #0
+    beq     .Lmark_introspection_return\label_suffix
+    // Use R4 as temp and check the mark bit of the reference.
+    ldr     r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tst     r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq     .Lmark_introspection_unmarked\label_suffix
+.Lmark_introspection_return\label_suffix:
+.endm
+
+.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
+.Lmark_introspection_unmarked\label_suffix:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
+    // the highest bits and the "forwarding address" state to have all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+    cmp     r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    bhs     .Lmark_introspection_forwarding_address\label_suffix
+.endm
+
+.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
+.Lmark_introspection_forwarding_address\label_suffix:
+    // Note: This macro generates exactly 22 bytes of code, the branch is near.
+
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl     ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    b       .Lmark_introspection_return\label_suffix
+.endm
+
+.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
+    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
+    ldrh    r4, [lr, #(-1 + \ldr_offset + 2)]
+.endm
+
+.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
+    // Load the 16-bit instruction. Adjust for the thumb state in LR.
+    ldrh    r4, [lr, #(-1 + \ldr_offset)]
+.endm
+
+.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
+    .balign 64
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
+    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
+    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
+art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
+    BRBMI_RUNTIME_CALL
+    // Load the LDR (or the half of it) that contains Rt.
+    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
+    b       .Lmark_introspection_extract_register_and_return\label_suffix
+    // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
+    // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
+    // the 6 byte forwarding address extraction here across the 32-byte boundary.
+    BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
+    // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
+    // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
+    // branch) shall take the rest of the 32-byte section (within a cache line).
+    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
+    BRBMI_RUNTIME_CALL
+    b       .Lmark_introspection_return\label_suffix
+.endm
+
+    /*
+     * Use introspection to load a reference from the same address as the LDR
+     * instruction in generated code would load (unless loaded by the thunk,
+     * see below), call ReadBarrier::Mark() with that reference if needed
+     * and return it in the same register as the LDR instruction would load.
+     *
+     * The entrypoint is called through a thunk that differs across load kinds.
+     * For field and array loads the LDR instruction in generated code follows
+     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
+     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
+     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
+     * knows the holder and performs the gray bit check, returning to the LDR
+     * instruction if the object is not gray, so this entrypoint no longer
+     * needs to know anything about the holder. For GC root loads, the LDR
+     * instruction in generated code precedes the branch to the thunk, i.e. the
+     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
+     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
+     * not do the gray bit check.
+     *
+     * For field accesses and array loads with a constant index the thunk loads
+     * the reference into IP using introspection and calls the main entrypoint,
+     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
+     * the passed reference is poisoned.
+     *
+     * For array accesses with non-constant index, the thunk inserts the bits
+     * 0-5 of the LDR instruction to the entrypoint address, effectively
+     * calculating a switch case label based on the index register (bits 0-3)
+     * and adding an extra offset (bits 4-5 hold the shift which is always 2
+     * for reference loads) to differentiate from the main entrypoint, then
+     * moves the base register to IP and jumps to the switch case. Therefore
+     * we need to align the main entrypoint to 512 bytes, accounting for
+     * a 256-byte offset followed by 16 array entrypoints starting at
+     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
+     * (register) and a branch to the main entrypoint.
+     *
+     * For GC root accesses we cannot use the main entrypoint because of the
+     * different offset where the LDR instruction in generated code is located.
+     * (And even with heap poisoning enabled, GC roots are not poisoned.)
+     * To re-use the same entrypoint pointer in generated code, we make sure
+     * that the gc root entrypoint (a copy of the entrypoint with a different
+     * offset for introspection loads) is located at a known offset (128 bytes,
+     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
+     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
+     * the root register to IP and jumps to the customized entrypoint,
+     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
+     * performs all the fast-path checks, so we need just the slow path.
+     *
+     * The code structure is
+     *   art_quick_read_barrier_mark_introspection:
+     *     Up to 32 bytes code for main entrypoint fast-path code for fields
+     *     (and array elements with constant offset) with LDR encoding T3;
+     *     jumps to the switch in the "narrow" entrypoint.
+     *     Padding to 32 bytes if needed.
+     *   art_quick_read_barrier_mark_introspection_narrow:
+     *     Up to 48 bytes code for fast path code for fields (and array
+     *     elements with constant offset) with LDR encoding T1, ending in the
+     *     return switch instruction TBB and the table with switch offsets.
+     *     Padding to 80 bytes if needed.
+     *   .Lmark_introspection_return_switch_case_r0:
+     *     Exactly 48 bytes of code for the return switch cases (12 cases,
+     *     including BKPT for the reserved registers).
+     *     Ends at 128 bytes total.
+     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:
+     *     GC root entrypoint code for LDR encoding T3 (28 bytes).
+     *     Forwarding address extraction for LDR encoding T3 (6 bytes).
+     *     Slow path for main entrypoint for LDR encoding T3 (30 bytes).
+     *     Ends at 192 bytes total.
+     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:
+     *     GC root entrypoint code for LDR encoding T1 (28 bytes).
+     *     Forwarding address extraction for LDR encoding T1 (6 bytes).
+     *     Slow path for main entrypoint for LDR encoding T1 (30 bytes).
+     *     Ends at 256 bytes total.
+     *   art_quick_read_barrier_mark_introspection_arrays:
+     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
+     */
+    .balign 512
+ENTRY art_quick_read_barrier_mark_introspection
+    // At this point, IP contains the reference, R4 can be freely used.
+    // (R4 is reserved for the entrypoint address.)
+    // For heap poisoning, the reference is poisoned, so unpoison it first.
+    UNPOISON_HEAP_REF ip
+    // Check for null or marked, lock word is loaded into IP.
+    BRBMI_CHECK_NULL_AND_MARKED _wide
+    // Load the half of the instruction that contains Rt.
+    BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+.Lmark_introspection_extract_register_and_return_wide:
+    lsr     r4, r4, #12               // Extract `ref_reg`.
+    b       .Lmark_introspection_return_switch
+
+    .balign 32
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_narrow, #function
+    .hidden art_quick_read_barrier_mark_introspection_narrow
+    .global art_quick_read_barrier_mark_introspection_narrow
+art_quick_read_barrier_mark_introspection_narrow:
+    // At this point, IP contains the reference, R4 can be freely used.
+    // (R4 is reserved for the entrypoint address.)
+    // For heap poisoning, the reference is poisoned, so unpoison it first.
+    UNPOISON_HEAP_REF ip
+    // Check for null or marked, lock word is loaded into R4.
+    BRBMI_CHECK_NULL_AND_MARKED _narrow
+    // Load the 16-bit instruction.
+    BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+.Lmark_introspection_extract_register_and_return_narrow:
+    and     r4, r4, #7                // Extract `ref_reg`.
+.Lmark_introspection_return_switch:
+    tbb     [pc, r4]                  // Jump to the switch case.
+.Lmark_introspection_return_table:
+    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
+    .balign 16
+    BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
+
+    BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
+    BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
+
+    .balign 256
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_arrays, #function
+    .hidden art_quick_read_barrier_mark_introspection_arrays
+    .global art_quick_read_barrier_mark_introspection_arrays
+art_quick_read_barrier_mark_introspection_arrays:
+    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
+END art_quick_read_barrier_mark_introspection
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c555126..18015b5 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2219,7 +2219,7 @@
 ENTRY art_quick_deoptimize
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     mov    x0, xSELF          // Pass thread.
-    bl     artDeoptimize      // artDeoptimize(Thread*)
+    bl     artDeoptimize      // (Thread*)
     brk 0
 END art_quick_deoptimize
 
@@ -2230,8 +2230,8 @@
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_EVERYTHING_FRAME
-    mov    x0, xSELF                      // Pass thread.
-    bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
+    mov    x1, xSELF                      // Pass thread.
+    bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
     brk 0
 END art_quick_deoptimize_from_compiled_code
 
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index a500648..d067f66 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -92,7 +92,6 @@
   // Switches execution of the executing context to this context
   NO_RETURN virtual void DoLongJump() = 0;
 
- protected:
   enum {
     kBadGprBase = 0xebad6070,
     kBadFprBase = 0xebad8070,
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 7bbcbf0..e628a9f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1978,8 +1978,7 @@
 ENTRY art_quick_deoptimize
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la       $t9, artDeoptimize
-    jalr     $t9            # artDeoptimize(Thread*)
-                            # Returns caller method's frame size.
+    jalr     $t9            # (Thread*)
     move     $a0, rSELF     # pass Thread::current
 END art_quick_deoptimize
 
@@ -1991,9 +1990,8 @@
 ENTRY art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_EVERYTHING_FRAME
     la       $t9, artDeoptimizeFromCompiledCode
-    jalr     $t9                            # artDeoptimizeFromCompiledCode(Thread*)
-                                            # Returns caller method's frame size.
-    move     $a0, rSELF                     # pass Thread::current
+    jalr     $t9                            # (DeoptimizationKind, Thread*)
+    move     $a1, rSELF                     # pass Thread::current
 END art_quick_deoptimize_from_compiled_code
 
     /*
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 8f713a1..40bad16 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1913,8 +1913,7 @@
     .extern artEnterInterpreterFromDeoptimize
 ENTRY art_quick_deoptimize
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    jal      artDeoptimize     # artDeoptimize(Thread*, SP)
-                               # Returns caller method's frame size.
+    jal      artDeoptimize     # artDeoptimize(Thread*)
     move     $a0, rSELF        # pass Thread::current
 END art_quick_deoptimize
 
@@ -1925,9 +1924,8 @@
     .extern artDeoptimizeFromCompiledCode
 ENTRY_NO_GP art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_EVERYTHING_FRAME
-    jal      artDeoptimizeFromCompiledCode    # artDeoptimizeFromCompiledCode(Thread*, SP)
-                                              # Returns caller method's frame size.
-    move     $a0, rSELF                       # pass Thread::current
+    jal      artDeoptimizeFromCompiledCode    # (DeoptimizationKind, Thread*)
+    move     $a1, rSELF                       # pass Thread::current
 END art_quick_deoptimize_from_compiled_code
 
   .set push
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 3694c3e..2222f5c 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2028,7 +2028,7 @@
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artDeoptimize)    // artDeoptimize(Thread*)
+    call SYMBOL(artDeoptimize)    // (Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
@@ -2038,11 +2038,12 @@
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
-    subl LITERAL(12), %esp                      // Align stack.
-    CFI_ADJUST_CFA_OFFSET(12)
+    subl LITERAL(8), %esp                      // Align stack.
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET                // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    PUSH eax
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize_from_compiled_code
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index ad7c2b3..41651d8 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1983,7 +1983,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
                                        // Stack should be aligned now.
     movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
-    call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
+    call SYMBOL(artDeoptimize)         // (Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
@@ -1994,8 +1994,8 @@
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_EVERYTHING_FRAME
                                                 // Stack should be aligned now.
-    movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
-    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
+    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize_from_compiled_code
 
diff --git a/runtime/base/arena_allocator-inl.h b/runtime/base/arena_allocator-inl.h
new file mode 100644
index 0000000..0e43837
--- /dev/null
+++ b/runtime/base/arena_allocator-inl.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ARENA_ALLOCATOR_INL_H_
+#define ART_RUNTIME_BASE_ARENA_ALLOCATOR_INL_H_
+
+#include "arena_allocator.h"
+
+namespace art {
+namespace arena_allocator {
+
+static constexpr bool kArenaAllocatorPreciseTracking = kArenaAllocatorCountAllocations;
+
+static constexpr size_t kArenaDefaultSize = kArenaAllocatorPreciseTracking
+                                                ? 32
+                                                : 128 * KB;
+
+}  // namespace arena_allocator
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ARENA_ALLOCATOR_INL_H_
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 136ed12..fc5b5b1 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -19,7 +19,7 @@
 #include <iomanip>
 #include <numeric>
 
-#include "arena_allocator.h"
+#include "arena_allocator-inl.h"
 #include "logging.h"
 #include "mem_map.h"
 #include "mutex.h"
@@ -29,7 +29,6 @@
 namespace art {
 
 constexpr size_t kMemoryToolRedZoneBytes = 8;
-constexpr size_t Arena::kDefaultSize;
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
@@ -181,7 +180,7 @@
 
 class MallocArena FINAL : public Arena {
  public:
-  explicit MallocArena(size_t size = Arena::kDefaultSize);
+  explicit MallocArena(size_t size = arena_allocator::kArenaDefaultSize);
   virtual ~MallocArena();
  private:
   static constexpr size_t RequiredOverallocation() {
@@ -344,6 +343,17 @@
       MEMORY_TOOL_MAKE_UNDEFINED(arena->memory_, arena->bytes_allocated_);
     }
   }
+
+  if (arena_allocator::kArenaAllocatorPreciseTracking) {
+    // Do not reuse arenas when tracking.
+    while (first != nullptr) {
+      Arena* next = first->next_;
+      delete first;
+      first = next;
+    }
+    return;
+  }
+
   if (first != nullptr) {
     Arena* last = first;
     while (last->next_ != nullptr) {
@@ -437,7 +447,7 @@
 }
 
 uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) {
-  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, bytes));
+  Arena* new_arena = pool_->AllocArena(std::max(arena_allocator::kArenaDefaultSize, bytes));
   DCHECK(new_arena != nullptr);
   DCHECK_LE(bytes, new_arena->Size());
   if (static_cast<size_t>(end_ - ptr_) > new_arena->Size() - bytes) {
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 60b6ea8..5430458 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -195,7 +195,6 @@
 
 class Arena {
  public:
-  static constexpr size_t kDefaultSize = 128 * KB;
   Arena();
   virtual ~Arena() { }
   // Reset is for pre-use and uses memset for performance.
diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc
index fd48a3f..e2c2e2f 100644
--- a/runtime/base/arena_allocator_test.cc
+++ b/runtime/base/arena_allocator_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "base/arena_allocator.h"
+#include "base/arena_allocator-inl.h"
 #include "base/arena_bit_vector.h"
 #include "base/memory_tool.h"
 #include "gtest/gtest.h"
@@ -65,23 +65,28 @@
 }
 
 TEST_F(ArenaAllocatorTest, LargeAllocations) {
+  if (arena_allocator::kArenaAllocatorPreciseTracking) {
+    printf("WARNING: TEST DISABLED FOR precise arena tracking\n");
+    return;
+  }
+
   {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
     // Note: Leaving some space for memory tool red zones.
-    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 5 / 8);
-    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 2 / 8);
+    void* alloc1 = arena.Alloc(arena_allocator::kArenaDefaultSize * 5 / 8);
+    void* alloc2 = arena.Alloc(arena_allocator::kArenaDefaultSize * 2 / 8);
     ASSERT_NE(alloc1, alloc2);
     ASSERT_EQ(1u, NumberOfArenas(&arena));
   }
   {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
-    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
-    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 11 / 16);
+    void* alloc1 = arena.Alloc(arena_allocator::kArenaDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(arena_allocator::kArenaDefaultSize * 11 / 16);
     ASSERT_NE(alloc1, alloc2);
     ASSERT_EQ(2u, NumberOfArenas(&arena));
-    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 7 / 16);
+    void* alloc3 = arena.Alloc(arena_allocator::kArenaDefaultSize * 7 / 16);
     ASSERT_NE(alloc1, alloc3);
     ASSERT_NE(alloc2, alloc3);
     ASSERT_EQ(3u, NumberOfArenas(&arena));
@@ -89,12 +94,12 @@
   {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
-    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
-    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
+    void* alloc1 = arena.Alloc(arena_allocator::kArenaDefaultSize * 13 / 16);
+    void* alloc2 = arena.Alloc(arena_allocator::kArenaDefaultSize * 9 / 16);
     ASSERT_NE(alloc1, alloc2);
     ASSERT_EQ(2u, NumberOfArenas(&arena));
     // Note: Leaving some space for memory tool red zones.
-    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    void* alloc3 = arena.Alloc(arena_allocator::kArenaDefaultSize * 5 / 16);
     ASSERT_NE(alloc1, alloc3);
     ASSERT_NE(alloc2, alloc3);
     ASSERT_EQ(2u, NumberOfArenas(&arena));
@@ -102,12 +107,12 @@
   {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
-    void* alloc1 = arena.Alloc(Arena::kDefaultSize * 9 / 16);
-    void* alloc2 = arena.Alloc(Arena::kDefaultSize * 13 / 16);
+    void* alloc1 = arena.Alloc(arena_allocator::kArenaDefaultSize * 9 / 16);
+    void* alloc2 = arena.Alloc(arena_allocator::kArenaDefaultSize * 13 / 16);
     ASSERT_NE(alloc1, alloc2);
     ASSERT_EQ(2u, NumberOfArenas(&arena));
     // Note: Leaving some space for memory tool red zones.
-    void* alloc3 = arena.Alloc(Arena::kDefaultSize * 5 / 16);
+    void* alloc3 = arena.Alloc(arena_allocator::kArenaDefaultSize * 5 / 16);
     ASSERT_NE(alloc1, alloc3);
     ASSERT_NE(alloc2, alloc3);
     ASSERT_EQ(2u, NumberOfArenas(&arena));
@@ -117,9 +122,9 @@
     ArenaAllocator arena(&pool);
     // Note: Leaving some space for memory tool red zones.
     for (size_t i = 0; i != 15; ++i) {
-      arena.Alloc(Arena::kDefaultSize * 1 / 16);    // Allocate 15 times from the same arena.
+      arena.Alloc(arena_allocator::kArenaDefaultSize * 1 / 16);    // Allocate 15 times from the same arena.
       ASSERT_EQ(i + 1u, NumberOfArenas(&arena));
-      arena.Alloc(Arena::kDefaultSize * 17 / 16);   // Allocate a separate arena.
+      arena.Alloc(arena_allocator::kArenaDefaultSize * 17 / 16);   // Allocate a separate arena.
       ASSERT_EQ(i + 2u, NumberOfArenas(&arena));
     }
   }
@@ -204,10 +209,11 @@
     ArenaPool pool;
     ArenaAllocator arena(&pool);
 
-    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    const size_t original_size = arena_allocator::kArenaDefaultSize -
+        ArenaAllocator::kAlignment * 5;
     void* original_allocation = arena.Alloc(original_size);
 
-    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    const size_t new_size = arena_allocator::kArenaDefaultSize + ArenaAllocator::kAlignment * 2;
     void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
     EXPECT_NE(original_allocation, realloc_allocation);
   }
@@ -217,12 +223,12 @@
     ArenaPool pool;
     ArenaAllocator arena(&pool);
 
-    const size_t original_size = Arena::kDefaultSize -
+    const size_t original_size = arena_allocator::kArenaDefaultSize -
         ArenaAllocator::kAlignment * 4 -
         ArenaAllocator::kAlignment / 2;
     void* original_allocation = arena.Alloc(original_size);
 
-    const size_t new_size = Arena::kDefaultSize +
+    const size_t new_size = arena_allocator::kArenaDefaultSize +
         ArenaAllocator::kAlignment * 2 +
         ArenaAllocator::kAlignment / 2;
     void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
@@ -307,11 +313,12 @@
     ArenaPool pool;
     ArenaAllocator arena(&pool);
 
-    const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5;
+    const size_t original_size = arena_allocator::kArenaDefaultSize -
+        ArenaAllocator::kAlignment * 5;
     void* original_allocation = arena.Alloc(original_size);
     ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
 
-    const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2;
+    const size_t new_size = arena_allocator::kArenaDefaultSize + ArenaAllocator::kAlignment * 2;
     void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
     EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation));
 
@@ -324,13 +331,13 @@
     ArenaPool pool;
     ArenaAllocator arena(&pool);
 
-    const size_t original_size = Arena::kDefaultSize -
+    const size_t original_size = arena_allocator::kArenaDefaultSize -
         ArenaAllocator::kAlignment * 4 -
         ArenaAllocator::kAlignment / 2;
     void* original_allocation = arena.Alloc(original_size);
     ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation));
 
-    const size_t new_size = Arena::kDefaultSize +
+    const size_t new_size = arena_allocator::kArenaDefaultSize +
         ArenaAllocator::kAlignment * 2 +
         ArenaAllocator::kAlignment / 2;
     void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size);
diff --git a/runtime/base/scoped_arena_allocator.cc b/runtime/base/scoped_arena_allocator.cc
index 7d04fa0..973f9b9 100644
--- a/runtime/base/scoped_arena_allocator.cc
+++ b/runtime/base/scoped_arena_allocator.cc
@@ -16,7 +16,7 @@
 
 #include "scoped_arena_allocator.h"
 
-#include "arena_allocator.h"
+#include "arena_allocator-inl.h"
 #include "base/memory_tool.h"
 
 namespace art {
@@ -54,7 +54,7 @@
 
 uint8_t* ArenaStack::AllocateFromNextArena(size_t rounded_bytes) {
   UpdateBytesAllocated();
-  size_t allocation_size = std::max(Arena::kDefaultSize, rounded_bytes);
+  size_t allocation_size = std::max(arena_allocator::kArenaDefaultSize, rounded_bytes);
   if (UNLIKELY(top_arena_ == nullptr)) {
     top_arena_ = bottom_arena_ = stats_and_pool_.pool->AllocArena(allocation_size);
     top_arena_->next_ = nullptr;
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 5394e53..862f0d0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -33,11 +33,22 @@
 }
 
 bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
+  return Init(filename, flags, block, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename,
+                       int flags,
+                       bool block,
+                       bool flush_on_close,
+                       std::string* error_msg) {
+  flush_on_close_ = flush_on_close;
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, flags));
+
+    bool check_usage = flush_on_close;  // Check usage only if we need to flush on close.
+    file_.reset(OS::OpenFileWithFlags(filename, flags, check_usage));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
@@ -86,6 +97,7 @@
 }
 
 bool ScopedFlock::Init(File* file, std::string* error_msg) {
+  flush_on_close_ = true;
   file_.reset(new File(dup(file->Fd()), file->GetPath(), file->CheckUsage(), file->ReadOnlyMode()));
   if (file_->Fd() == -1) {
     file_.reset();
@@ -111,17 +123,21 @@
   return file_.get() != nullptr;
 }
 
-ScopedFlock::ScopedFlock() { }
+ScopedFlock::ScopedFlock() : flush_on_close_(true) { }
 
 ScopedFlock::~ScopedFlock() {
   if (file_.get() != nullptr) {
     int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
     if (flock_result != 0) {
-      PLOG(FATAL) << "Unable to unlock file " << file_->GetPath();
-      UNREACHABLE();
+      // Only printing a warning is okay since this is only used with either:
+      // 1) a non-blocking Init call, or
+      // 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent
+      //    deadlocks.
+      // This means we can be sure that the warning won't cause a deadlock.
+      PLOG(WARNING) << "Unable to unlock file " << file_->GetPath();
     }
     int close_result = -1;
-    if (file_->ReadOnlyMode()) {
+    if (file_->ReadOnlyMode() || !flush_on_close_) {
       close_result = file_->Close();
     } else {
       close_result = file_->FlushCloseOrErase();
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index cc22056..a3a320f 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -25,6 +25,15 @@
 
 namespace art {
 
+// A scoped file-lock implemented using flock. The file is locked by calling the Init function and
+// is released during destruction. Note that failing to unlock the file only causes a warning to be
+// printed. Users should take care that this does not cause potential deadlocks.
+//
+// Only printing a warning on unlock failure is okay since this is only used with either:
+// 1) a non-blocking Init call, or
+// 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent
+//    deadlocks.
+// This means we can be sure that the warning won't cause a deadlock.
 class ScopedFlock {
  public:
   ScopedFlock();
@@ -38,7 +47,16 @@
   // locking will be retried if the file changed. In non-blocking mode, false
   // is returned and no attempt is made to re-acquire the lock.
   //
+  // The argument `flush_on_close` controls whether or not the file
+  // will be explicitly flushed before close.
+  //
   // The file is opened with the provided flags.
+  bool Init(const char* filename,
+            int flags,
+            bool block,
+            bool flush_on_close,
+            std::string* error_msg);
+  // Calls Init(filename, flags, block, true, error_msg);
   bool Init(const char* filename, int flags, bool block, std::string* error_msg);
   // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
@@ -57,6 +75,7 @@
 
  private:
   std::unique_ptr<File> file_;
+  bool flush_on_close_;
   DISALLOW_COPY_AND_ASSIGN(ScopedFlock);
 };
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 3ecf595..694c113 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -8705,6 +8705,8 @@
     DCHECK(h_long_array != nullptr);
     h_long_array->Set(kDexFileIndexStart, reinterpret_cast<intptr_t>(dex_file));
 
+    // Note that this creates a finalizable dalvik.system.DexFile object and a corresponding
+    // FinalizerReference which will never get cleaned up without a started runtime.
     Handle<mirror::Object> h_dex_file = hs2.NewHandle(
         cookie_field->GetDeclaringClass()->AllocObject(self));
     DCHECK(h_dex_file != nullptr);
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 56e8aa3..a29cc6c 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -249,6 +249,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \
+  if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \
+    printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index dbfcdfe..4847f38 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -140,6 +140,26 @@
   UNREACHABLE();
 }
 
+CompilerFilter::Filter CompilerFilter::GetSafeModeFilterFrom(Filter filter) {
+  // For safe mode, we should not return a filter that generates AOT compiled
+  // code.
+  switch (filter) {
+    case CompilerFilter::kAssumeVerified:
+    case CompilerFilter::kExtract:
+    case CompilerFilter::kVerify:
+    case CompilerFilter::kQuicken:
+      return filter;
+
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverything:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kEverythingProfile:
+      return CompilerFilter::kQuicken;
+  }
+  UNREACHABLE();
+}
 
 bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) {
   return current >= target;
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index 9cb54b1..f802439 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -75,6 +75,9 @@
   // Returns a non-profile-guided version of the given filter.
   static Filter GetNonProfileDependentFilterFrom(Filter filter);
 
+  // Returns a filter suitable for safe mode.
+  static Filter GetSafeModeFilterFrom(Filter filter);
+
   // Returns true if the 'current' compiler filter is considered at least as
   // good as the 'target' compilation type.
   // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is
diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc
index a59165f..383f4e3 100644
--- a/runtime/compiler_filter_test.cc
+++ b/runtime/compiler_filter_test.cc
@@ -28,6 +28,13 @@
   EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter));
 }
 
+static void TestSafeModeFilter(CompilerFilter::Filter expected, std::string name) {
+  CompilerFilter::Filter parsed;
+  EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed));
+  EXPECT_EQ(expected, CompilerFilter::GetSafeModeFilterFrom(parsed));
+}
+
+
 // Verify the dexopt status values from dalvik.system.DexFile
 // match the OatFileAssistant::DexOptStatus values.
 TEST(CompilerFilterTest, ParseCompilerFilter) {
@@ -47,4 +54,17 @@
   EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter));
 }
 
+TEST(CompilerFilterTest, SafeModeFilter) {
+  TestSafeModeFilter(CompilerFilter::kAssumeVerified, "assume-verified");
+  TestSafeModeFilter(CompilerFilter::kExtract, "extract");
+  TestSafeModeFilter(CompilerFilter::kVerify, "verify");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "quicken");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "space-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "space");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "speed-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "speed");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "everything-profile");
+  TestSafeModeFilter(CompilerFilter::kQuicken, "everything");
+}
+
 }  // namespace art
diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h
new file mode 100644
index 0000000..14e189c
--- /dev/null
+++ b/runtime/deoptimization_kind.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEOPTIMIZATION_KIND_H_
+#define ART_RUNTIME_DEOPTIMIZATION_KIND_H_
+
+namespace art {
+
+enum class DeoptimizationKind {
+  kAotInlineCache = 0,
+  kJitInlineCache,
+  kJitSameTarget,
+  kLoopBoundsBCE,
+  kLoopNullBCE,
+  kBlockBCE,
+  kCHA,
+  kFullFrame,
+  kLast = kFullFrame
+};
+
+inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) {
+  switch (kind) {
+    case DeoptimizationKind::kAotInlineCache: return "AOT inline cache";
+    case DeoptimizationKind::kJitInlineCache: return "JIT inline cache";
+    case DeoptimizationKind::kJitSameTarget: return "JIT same target";
+    case DeoptimizationKind::kLoopBoundsBCE: return "loop bounds check elimination";
+    case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null";
+    case DeoptimizationKind::kBlockBCE: return "block bounds check elimination";
+    case DeoptimizationKind::kCHA: return "class hierarchy analysis";
+    case DeoptimizationKind::kFullFrame: return "full frame";
+  }
+  LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind);
+  UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEOPTIMIZATION_KIND_H_
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index e58c6f5..6765407 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -42,7 +42,16 @@
     CommonRuntimeTest::SetUp();
 
     // Create a scratch directory to work from.
-    scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest";
+
+    // Get the realpath of the android data. The oat dir should always point to real location
+    // when generating oat files in dalvik-cache. This avoids complicating the unit tests
+    // when matching the expected paths.
+    UniqueCPtr<const char[]> android_data_real(realpath(android_data_.c_str(), nullptr));
+    ASSERT_TRUE(android_data_real != nullptr)
+      << "Could not get the realpath of the android data" << android_data_ << strerror(errno);
+
+    scratch_dir_.assign(android_data_real.get());
+    scratch_dir_ += "/Dex2oatEnvironmentTest";
     ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
 
     // Create a subdirectory in scratch for odex files.
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1b18d21..36c7341 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -92,8 +92,8 @@
     uint32_t method_ids_off_;  // file offset of MethodIds array
     uint32_t class_defs_size_;  // number of ClassDefs
     uint32_t class_defs_off_;  // file offset of ClassDef array
-    uint32_t data_size_;  // unused
-    uint32_t data_off_;  // unused
+    uint32_t data_size_;  // size of data section
+    uint32_t data_off_;  // file offset of data section
 
     // Decode the dex magic version
     uint32_t GetVersion() const;
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 1397916..f21f1a2 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -1421,11 +1421,20 @@
 }
 
 const char* GetSourceDebugExtension(Handle<mirror::Class> klass) {
+  // Before instantiating ClassData, check that klass has a DexCache
+  // assigned.  The ClassData constructor indirectly dereferences it
+  // when calling klass->GetDexFile().
+  if (klass->GetDexCache() == nullptr) {
+    DCHECK(klass->IsPrimitive() || klass->IsArrayClass());
+    return nullptr;
+  }
+
   ClassData data(klass);
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
+
   const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet(
       data.GetDexFile(),
       annotation_set,
@@ -1434,6 +1443,7 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
+
   const uint8_t* annotation =
       SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value");
   if (annotation == nullptr) {
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 11b3cd0..c18ab47 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -922,12 +922,12 @@
   return true;
 }
 
-bool DexFileVerifier::FindClassFlags(uint32_t index,
-                                     bool is_field,
-                                     dex::TypeIndex* class_type_index,
-                                     uint32_t* class_access_flags) {
+bool DexFileVerifier::FindClassIndexAndDef(uint32_t index,
+                                           bool is_field,
+                                           dex::TypeIndex* class_type_index,
+                                           const DexFile::ClassDef** output_class_def) {
   DCHECK(class_type_index != nullptr);
-  DCHECK(class_access_flags != nullptr);
+  DCHECK(output_class_def != nullptr);
 
   // First check if the index is valid.
   if (index >= (is_field ? header_->field_ids_size_ : header_->method_ids_size_)) {
@@ -957,7 +957,7 @@
   for (size_t i = 0; i < header_->class_defs_size_; ++i) {
     const DexFile::ClassDef* class_def = class_def_begin + i;
     if (class_def->class_idx_ == *class_type_index) {
-      *class_access_flags = class_def->access_flags_;
+      *output_class_def = class_def;
       return true;
     }
   }
@@ -966,13 +966,13 @@
   return false;
 }
 
-bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field,
-                                                 const char* type_descr,
-                                                 uint32_t curr_index,
-                                                 uint32_t prev_index,
-                                                 bool* have_class,
-                                                 dex::TypeIndex* class_type_index,
-                                                 uint32_t* class_access_flags) {
+bool DexFileVerifier::CheckOrderAndGetClassDef(bool is_field,
+                                               const char* type_descr,
+                                               uint32_t curr_index,
+                                               uint32_t prev_index,
+                                               bool* have_class,
+                                               dex::TypeIndex* class_type_index,
+                                               const DexFile::ClassDef** class_def) {
   if (curr_index < prev_index) {
     ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32,
                       type_descr,
@@ -982,7 +982,7 @@
   }
 
   if (!*have_class) {
-    *have_class = FindClassFlags(curr_index, is_field, class_type_index, class_access_flags);
+    *have_class = FindClassIndexAndDef(curr_index, is_field, class_type_index, class_def);
     if (!*have_class) {
       // Should have really found one.
       ErrorStringPrintf("could not find declaring class for %s index %" PRIu32,
@@ -994,34 +994,130 @@
   return true;
 }
 
+bool DexFileVerifier::CheckStaticFieldTypes(const DexFile::ClassDef* class_def) {
+  if (class_def == nullptr) {
+    return true;
+  }
+
+  ClassDataItemIterator field_it(*dex_file_, ptr_);
+  EncodedStaticFieldValueIterator array_it(*dex_file_, *class_def);
+
+  for (; field_it.HasNextStaticField() && array_it.HasNext(); field_it.Next(), array_it.Next()) {
+    uint32_t index = field_it.GetMemberIndex();
+    const DexFile::TypeId& type_id = dex_file_->GetTypeId(dex_file_->GetFieldId(index).type_idx_);
+    const char* field_type_name =
+        dex_file_->GetStringData(dex_file_->GetStringId(type_id.descriptor_idx_));
+    Primitive::Type field_type = Primitive::GetType(field_type_name[0]);
+    EncodedArrayValueIterator::ValueType array_type = array_it.GetValueType();
+    // Ensure this matches RuntimeEncodedStaticFieldValueIterator.
+    switch (array_type) {
+      case EncodedArrayValueIterator::ValueType::kBoolean:
+        if (field_type != Primitive::kPrimBoolean) {
+          ErrorStringPrintf("unexpected static field initial value type: 'Z' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kByte:
+        if (field_type != Primitive::kPrimByte) {
+          ErrorStringPrintf("unexpected static field initial value type: 'B' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kShort:
+        if (field_type != Primitive::kPrimShort) {
+          ErrorStringPrintf("unexpected static field initial value type: 'S' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kChar:
+        if (field_type != Primitive::kPrimChar) {
+          ErrorStringPrintf("unexpected static field initial value type: 'C' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kInt:
+        if (field_type != Primitive::kPrimInt) {
+          ErrorStringPrintf("unexpected static field initial value type: 'I' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kLong:
+        if (field_type != Primitive::kPrimLong) {
+          ErrorStringPrintf("unexpected static field initial value type: 'J' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kFloat:
+        if (field_type != Primitive::kPrimFloat) {
+          ErrorStringPrintf("unexpected static field initial value type: 'F' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kDouble:
+        if (field_type != Primitive::kPrimDouble) {
+          ErrorStringPrintf("unexpected static field initial value type: 'D' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      case EncodedArrayValueIterator::ValueType::kNull:
+      case EncodedArrayValueIterator::ValueType::kString:
+      case EncodedArrayValueIterator::ValueType::kType:
+        if (field_type != Primitive::kPrimNot) {
+          ErrorStringPrintf("unexpected static field initial value type: 'L' vs '%c'",
+                            field_type_name[0]);
+          return false;
+        }
+        break;
+      default:
+        ErrorStringPrintf("unexpected static field initial value type: %x", array_type);
+        return false;
+    }
+  }
+
+  if (array_it.HasNext()) {
+    ErrorStringPrintf("too many static field initial values");
+    return false;
+  }
+  return true;
+}
+
 template <bool kStatic>
 bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                                     bool* have_class,
                                                     dex::TypeIndex* class_type_index,
-                                                    uint32_t* class_access_flags) {
+                                                    const DexFile::ClassDef** class_def) {
   DCHECK(it != nullptr);
   // These calls use the raw access flags to check whether the whole dex field is valid.
   uint32_t prev_index = 0;
   for (; kStatic ? it->HasNextStaticField() : it->HasNextInstanceField(); it->Next()) {
     uint32_t curr_index = it->GetMemberIndex();
-    if (!CheckOrderAndGetClassFlags(true,
-                                    kStatic ? "static field" : "instance field",
-                                    curr_index,
-                                    prev_index,
-                                    have_class,
-                                    class_type_index,
-                                    class_access_flags)) {
+    if (!CheckOrderAndGetClassDef(true,
+                                  kStatic ? "static field" : "instance field",
+                                  curr_index,
+                                  prev_index,
+                                  have_class,
+                                  class_type_index,
+                                  class_def)) {
       return false;
     }
-    prev_index = curr_index;
-
+    DCHECK(class_def != nullptr);
     if (!CheckClassDataItemField(curr_index,
                                  it->GetRawMemberAccessFlags(),
-                                 *class_access_flags,
+                                 (*class_def)->access_flags_,
                                  *class_type_index,
                                  kStatic)) {
       return false;
     }
+
+    prev_index = curr_index;
   }
 
   return true;
@@ -1033,30 +1129,31 @@
     std::unordered_set<uint32_t>* direct_method_indexes,
     bool* have_class,
     dex::TypeIndex* class_type_index,
-    uint32_t* class_access_flags) {
+    const DexFile::ClassDef** class_def) {
   uint32_t prev_index = 0;
   for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) {
     uint32_t curr_index = it->GetMemberIndex();
-    if (!CheckOrderAndGetClassFlags(false,
-                                    kDirect ? "direct method" : "virtual method",
-                                    curr_index,
-                                    prev_index,
-                                    have_class,
-                                    class_type_index,
-                                    class_access_flags)) {
+    if (!CheckOrderAndGetClassDef(false,
+                                  kDirect ? "direct method" : "virtual method",
+                                  curr_index,
+                                  prev_index,
+                                  have_class,
+                                  class_type_index,
+                                  class_def)) {
       return false;
     }
-    prev_index = curr_index;
-
+    DCHECK(class_def != nullptr);
     if (!CheckClassDataItemMethod(curr_index,
                                   it->GetRawMemberAccessFlags(),
-                                  *class_access_flags,
+                                  (*class_def)->access_flags_,
                                   *class_type_index,
                                   it->GetMethodCodeItemOffset(),
                                   direct_method_indexes,
                                   kDirect)) {
       return false;
     }
+
+    prev_index = curr_index;
   }
 
   return true;
@@ -1071,19 +1168,19 @@
   // as the lookup is expensive, cache the result.
   bool have_class = false;
   dex::TypeIndex class_type_index;
-  uint32_t class_access_flags;
+  const DexFile::ClassDef* class_def = nullptr;
 
   // Check fields.
   if (!CheckIntraClassDataItemFields<true>(&it,
                                            &have_class,
                                            &class_type_index,
-                                           &class_access_flags)) {
+                                           &class_def)) {
     return false;
   }
   if (!CheckIntraClassDataItemFields<false>(&it,
                                             &have_class,
                                             &class_type_index,
-                                            &class_access_flags)) {
+                                            &class_def)) {
     return false;
   }
 
@@ -1092,18 +1189,25 @@
                                             &direct_method_indexes,
                                             &have_class,
                                             &class_type_index,
-                                            &class_access_flags)) {
+                                            &class_def)) {
     return false;
   }
   if (!CheckIntraClassDataItemMethods<false>(&it,
                                              &direct_method_indexes,
                                              &have_class,
                                              &class_type_index,
-                                             &class_access_flags)) {
+                                             &class_def)) {
     return false;
   }
 
-  ptr_ = it.EndDataPointer();
+  const uint8_t* end_ptr = it.EndDataPointer();
+
+  // Check static field types against initial static values in encoded array.
+  if (!CheckStaticFieldTypes(class_def)) {
+    return false;
+  }
+
+  ptr_ = end_ptr;
   return true;
 }
 
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 71b316c..d1043c6 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -86,13 +86,14 @@
                                 uint32_t code_offset,
                                 std::unordered_set<uint32_t>* direct_method_indexes,
                                 bool expect_direct);
-  bool CheckOrderAndGetClassFlags(bool is_field,
-                                  const char* type_descr,
-                                  uint32_t curr_index,
-                                  uint32_t prev_index,
-                                  bool* have_class,
-                                  dex::TypeIndex* class_type_index,
-                                  uint32_t* class_access_flags);
+  bool CheckOrderAndGetClassDef(bool is_field,
+                                const char* type_descr,
+                                uint32_t curr_index,
+                                uint32_t prev_index,
+                                bool* have_class,
+                                dex::TypeIndex* class_type_index,
+                                const DexFile::ClassDef** class_def);
+  bool CheckStaticFieldTypes(const DexFile::ClassDef* class_def);
 
   bool CheckPadding(size_t offset, uint32_t aligned_offset);
   bool CheckEncodedValue();
@@ -106,7 +107,7 @@
   bool CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                      bool* have_class,
                                      dex::TypeIndex* class_type_index,
-                                     uint32_t* class_access_flags);
+                                     const DexFile::ClassDef** class_def);
   // Check all methods of the given type from the given iterator. Load the class data from the first
   // method, if necessary (and return it), or use the given values.
   template <bool kDirect>
@@ -114,7 +115,7 @@
                                       std::unordered_set<uint32_t>* direct_method_indexes,
                                       bool* have_class,
                                       dex::TypeIndex* class_type_index,
-                                      uint32_t* class_access_flags);
+                                      const DexFile::ClassDef** class_def);
 
   bool CheckIntraCodeItem();
   bool CheckIntraStringDataItem();
@@ -165,16 +166,15 @@
       __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR;
   bool FailureReasonIsSet() const { return failure_reason_.size() != 0; }
 
-  // Retrieve class index and class access flag from the given member. index is the member index,
-  // which is taken as either a field or a method index (as designated by is_field). The result,
-  // if the member and declaring class could be found, is stored in class_type_index and
-  // class_access_flags.
-  // This is an expensive lookup, as we have to find the class-def by type index, which is a
+  // Retrieve class index and class def from the given member. index is the member index, which is
+  // taken as either a field or a method index (as designated by is_field). The result, if the
+  // member and declaring class could be found, is stored in class_type_index and class_def.
+  // This is an expensive lookup, as we have to find the class def by type index, which is a
   // linear search. The output values should thus be cached by the caller.
-  bool FindClassFlags(uint32_t index,
-                      bool is_field,
-                      dex::TypeIndex* class_type_index,
-                      uint32_t* class_access_flags);
+  bool FindClassIndexAndDef(uint32_t index,
+                            bool is_field,
+                            dex::TypeIndex* class_type_index,
+                            const DexFile::ClassDef** output_class_def);
 
   // Check validity of the given access flags, interpreted for a field in the context of a class
   // with the given second access flags.
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 7736f3d..068e122 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -123,7 +123,7 @@
 // To generate a base64 encoded Dex file (such as kGoodTestDex, below)
 // from Smali files, use:
 //
-//   smali -o classes.dex class1.smali [class2.smali ...]
+//   smali assemble -o classes.dex class1.smali [class2.smali ...]
 //   base64 classes.dex >classes.dex.base64
 
 // For reference.
@@ -1461,7 +1461,7 @@
 
 // To generate a base64 encoded Dex file version 037 from Smali files, use:
 //
-//   smali --api-level 24 -o classes.dex class1.smali [class2.smali ...]
+//   smali assemble --api 24 -o classes.dex class1.smali [class2.smali ...]
 //   base64 classes.dex >classes.dex.base64
 
 // Dex file version 037 generated from:
@@ -2090,4 +2090,105 @@
   }
 }
 
+TEST_F(DexFileVerifierTest, BadStaticFieldInitialValuesArray) {
+  // Generated DEX file version (037) from:
+  //
+  // .class public LBadStaticFieldInitialValuesArray;
+  // .super Ljava/lang/Object;
+  //
+  //  # static fields
+  //  .field static final c:C = 'c'
+  //  .field static final i:I = 0x1
+  //  .field static final s:Ljava/lang/String; = "s"
+  //
+  //  # direct methods
+  //  .method public constructor <init>()V
+  //      .registers 1
+  //      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  //      return-void
+  //  .end method
+  //
+  // Output file was hex edited so that static field "i" has string typing in initial values array.
+  static const char kDexBase64[] =
+      "ZGV4CjAzNQBrMi4cCPcMvvXNRw0uI6RRubwMPwgEYXIsAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAL"
+      "AAAAcAAAAAYAAACcAAAAAQAAALQAAAADAAAAwAAAAAIAAADYAAAAAQAAAOgAAAAkAQAACAEAACAB"
+      "AAAoAQAAMAEAADMBAAA2AQAAOwEAAE8BAABjAQAAZgEAAGkBAABsAQAAAgAAAAMAAAAEAAAABQAA"
+      "AAYAAAAHAAAABwAAAAUAAAAAAAAAAgAAAAgAAAACAAEACQAAAAIABAAKAAAAAgAAAAAAAAADAAAA"
+      "AAAAAAIAAAABAAAAAwAAAAAAAAABAAAAAAAAAHsBAAB0AQAAAQABAAEAAABvAQAABAAAAHAQAQAA"
+      "AA4ABjxpbml0PgAGQS5qYXZhAAFDAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEv"
+      "bGFuZy9TdHJpbmc7AAFWAAFjAAFpAAFzAAEABw4AAwNjFwoXCgMAAQAAGAEYARgAgYAEiAIADQAA"
+      "AAAAAAABAAAAAAAAAAEAAAALAAAAcAAAAAIAAAAGAAAAnAAAAAMAAAABAAAAtAAAAAQAAAADAAAA"
+      "wAAAAAUAAAACAAAA2AAAAAYAAAABAAAA6AAAAAEgAAABAAAACAEAAAIgAAALAAAAIAEAAAMgAAAB"
+      "AAAAbwEAAAUgAAABAAAAdAEAAAAgAAABAAAAewEAAAAQAAABAAAAjAEAAA==";
+
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length));
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+  EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(),
+                                       dex_file->Begin(),
+                                       dex_file->Size(),
+                                       "bad static field initial values array",
+                                       /*verify_checksum*/ true,
+                                       &error_msg));
+}
+
+TEST_F(DexFileVerifierTest, GoodStaticFieldInitialValuesArray) {
+  // Generated DEX file version (037) from:
+  //
+  //  .class public LGoodStaticFieldInitialValuesArray;
+  //  .super Ljava/lang/Object;
+  //
+  //  # static fields
+  //  .field static final b:B = 0x1t
+  //  .field static final c:C = 'c'
+  //  .field static final d:D = 0.6
+  //  .field static final f:F = 0.5f
+  //  .field static final i:I = 0x3
+  //  .field static final j:J = 0x4L
+  //  .field static final l1:Ljava/lang/String;
+  //  .field static final l2:Ljava/lang/String; = "s"
+  //  .field static final l3:Ljava/lang/Class; = Ljava/lang/String;
+  //  .field static final s:S = 0x2s
+  //  .field static final z:Z = true
+  //
+  //  # direct methods
+  //  .method public constructor <init>()V
+  //      .registers 1
+  //      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  //      return-void
+  //  .end method
+  static const char kDexBase64[] =
+      "ZGV4CjAzNQAwWxLbdhFa1NGiFWjsy5fhUCHxe5QHtPY8AwAAcAAAAHhWNBIAAAAAAAAAAJwCAAAZ"
+      "AAAAcAAAAA0AAADUAAAAAQAAAAgBAAALAAAAFAEAAAIAAABsAQAAAQAAAHwBAACgAQAAnAEAAJwB"
+      "AACkAQAApwEAAKoBAACtAQAAsAEAALMBAAC2AQAA2wEAAO4BAAACAgAAFgIAABkCAAAcAgAAHwIA"
+      "ACICAAAlAgAAKAIAACsCAAAuAgAAMQIAADUCAAA5AgAAPQIAAEACAAABAAAAAgAAAAMAAAAEAAAA"
+      "BQAAAAYAAAAHAAAACAAAAAkAAAAKAAAACwAAAAwAAAANAAAADAAAAAsAAAAAAAAABgAAAA4AAAAG"
+      "AAEADwAAAAYAAgAQAAAABgADABEAAAAGAAQAEgAAAAYABQATAAAABgAJABQAAAAGAAkAFQAAAAYA"
+      "BwAWAAAABgAKABcAAAAGAAwAGAAAAAYAAAAAAAAACAAAAAAAAAAGAAAAAQAAAAgAAAAAAAAA////"
+      "/wAAAAB8AgAARAIAAAY8aW5pdD4AAUIAAUMAAUQAAUYAAUkAAUoAI0xHb29kU3RhdGljRmllbGRJ"
+      "bml0aWFsVmFsdWVzQXJyYXk7ABFMamF2YS9sYW5nL0NsYXNzOwASTGphdmEvbGFuZy9PYmplY3Q7"
+      "ABJMamF2YS9sYW5nL1N0cmluZzsAAVMAAVYAAVoAAWIAAWMAAWQAAWYAAWkAAWoAAmwxAAJsMgAC"
+      "bDMAAXMAAXoAAAsAAQNj8TMzMzMzM+M/ED8EAwYEHhcXGAkCAj8AAAAAAQABAAEAAAAAAAAABAAA"
+      "AHAQAQAAAA4ACwABAAAYARgBGAEYARgBGAEYARgBGAEYARgAgYAE5AQNAAAAAAAAAAEAAAAAAAAA"
+      "AQAAABkAAABwAAAAAgAAAA0AAADUAAAAAwAAAAEAAAAIAQAABAAAAAsAAAAUAQAABQAAAAIAAABs"
+      "AQAABgAAAAEAAAB8AQAAAiAAABkAAACcAQAABSAAAAEAAABEAgAAAxAAAAEAAABgAgAAASAAAAEA"
+      "AABkAgAAACAAAAEAAAB8AgAAABAAAAEAAACcAgAA";
+
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length));
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "good static field initial values array",
+                                      /*verify_checksum*/ true,
+                                      &error_msg));
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 9f34c12..b2267e5 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -31,57 +31,46 @@
 using android::base::StringPrintf;
 
 const char* const Instruction::kInstructionNames[] = {
-#define INSTRUCTION_NAME(o, c, pname, f, i, a, v) pname,
+#define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
 #undef DEX_INSTRUCTION_LIST
 #undef INSTRUCTION_NAME
 };
 
-Instruction::Format const Instruction::kInstructionFormats[] = {
-#define INSTRUCTION_FORMAT(o, c, p, format, i, a, v) format,
-#include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_FORMAT)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_FORMAT
-};
+static_assert(sizeof(Instruction::InstructionDescriptor) == 8u, "Unexpected descriptor size");
 
-Instruction::IndexType const Instruction::kInstructionIndexTypes[] = {
-#define INSTRUCTION_INDEX_TYPE(o, c, p, f, index, a, v) index,
-#include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_INDEX_TYPE)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_FLAGS
-};
+static constexpr int8_t InstructionSizeInCodeUnitsByOpcode(Instruction::Code opcode,
+                                                           Instruction::Format format) {
+  if (opcode == Instruction::Code::NOP) {
+    return -1;
+  } else if ((format >= Instruction::Format::k10x) && (format <= Instruction::Format::k10t)) {
+    return 1;
+  } else if ((format >= Instruction::Format::k20t) && (format <= Instruction::Format::k22c)) {
+    return 2;
+  } else if ((format >= Instruction::Format::k32x) && (format <= Instruction::Format::k3rc)) {
+    return 3;
+  } else if ((format >= Instruction::Format::k45cc) && (format <= Instruction::Format::k4rcc)) {
+    return 4;
+  } else if (format == Instruction::Format::k51l) {
+    return 5;
+  } else {
+    return -1;
+  }
+}
 
-int const Instruction::kInstructionFlags[] = {
-#define INSTRUCTION_FLAGS(o, c, p, f, i, flags, v) flags,
+Instruction::InstructionDescriptor const Instruction::kInstructionDescriptors[] = {
+#define INSTRUCTION_DESCR(opcode, c, p, format, index, flags, eflags, vflags) \
+    { vflags, \
+      format, \
+      index, \
+      flags, \
+      InstructionSizeInCodeUnitsByOpcode((c), (format)), \
+    },
 #include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_FLAGS)
+  DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
 #undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_FLAGS
-};
-
-int const Instruction::kInstructionVerifyFlags[] = {
-#define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, i, a, vflags) vflags,
-#include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_VERIFY_FLAGS)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_VERIFY_FLAGS
-};
-
-int const Instruction::kInstructionSizeInCodeUnits[] = {
-#define INSTRUCTION_SIZE(opcode, c, p, format, i, a, v) \
-    (((opcode) == NOP) ? -1 : \
-     (((format) >= k10x) && ((format) <= k10t)) ?  1 : \
-     (((format) >= k20t) && ((format) <= k22c)) ?  2 : \
-     (((format) >= k32x) && ((format) <= k3rc)) ?  3 : \
-     (((format) >= k45cc) && ((format) <= k4rcc)) ? 4 : \
-      ((format) == k51l) ?  5 : -1),
-#include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_SIZE
+#undef INSTRUCTION_DESCR
 };
 
 int32_t Instruction::GetTargetOffset() const {
@@ -520,7 +509,7 @@
 struct InstructionStaticAsserts : private Instruction {
   #define IMPLIES(a, b) (!(a) || (b))
 
-  #define VAR_ARGS_CHECK(o, c, pname, f, i, a, v) \
+  #define VAR_ARGS_CHECK(o, c, pname, f, i, a, e, v) \
     static_assert(IMPLIES((f) == k35c || (f) == k45cc, \
                           ((v) & (kVerifyVarArg | kVerifyVarArgNonZero)) != 0), \
                   "Missing var-arg verification");
@@ -529,7 +518,7 @@
   #undef DEX_INSTRUCTION_LIST
   #undef VAR_ARGS_CHECK
 
-  #define VAR_ARGS_RANGE_CHECK(o, c, pname, f, i, a, v) \
+  #define VAR_ARGS_RANGE_CHECK(o, c, pname, f, i, a, e, v) \
     static_assert(IMPLIES((f) == k3rc || (f) == k4rcc, \
                           ((v) & (kVerifyVarArgRange | kVerifyVarArgRangeNonZero)) != 0), \
                   "Missing var-arg verification");
@@ -537,6 +526,14 @@
     DEX_INSTRUCTION_LIST(VAR_ARGS_RANGE_CHECK)
   #undef DEX_INSTRUCTION_LIST
   #undef VAR_ARGS_RANGE_CHECK
+
+  #define EXPERIMENTAL_CHECK(o, c, pname, f, i, a, e, v) \
+    static_assert(kHaveExperimentalInstructions || (((a) & kExperimental) == 0), \
+                  "Unexpected experimental instruction.");
+    #include "dex_instruction_list.h"
+  DEX_INSTRUCTION_LIST(EXPERIMENTAL_CHECK)
+  #undef DEX_INSTRUCTION_LIST
+  #undef EXPERIMENTAL_CHECK
 };
 
 std::ostream& operator<<(std::ostream& os, const Instruction::Code& code) {
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index d269110..9a17576 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, i, a, v) cname = (opcode),
+#define INSTRUCTION_ENUM(opcode, cname, p, f, i, a, e, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
@@ -88,7 +88,7 @@
     RSUB_INT_LIT16 = RSUB_INT,
   };
 
-  enum Format {
+  enum Format : uint8_t {
     k10x,  // op
     k12x,  // op vA, vB
     k11n,  // op vA, #+B
@@ -124,7 +124,7 @@
     k51l,  // op vAA, #+BBBBBBBBBBBBBBBB
   };
 
-  enum IndexType {
+  enum IndexType : uint8_t {
     kIndexUnknown = 0,
     kIndexNone,               // has no index
     kIndexTypeRef,            // type reference index
@@ -137,14 +137,19 @@
     kIndexCallSiteRef,        // call site reference index
   };
 
-  enum Flags {
-    kBranch              = 0x0000001,  // conditional or unconditional branch
-    kContinue            = 0x0000002,  // flow can continue to next statement
-    kSwitch              = 0x0000004,  // switch statement
-    kThrow               = 0x0000008,  // could cause an exception to be thrown
-    kReturn              = 0x0000010,  // returns, no additional statements
-    kInvoke              = 0x0000020,  // a flavor of invoke
-    kUnconditional       = 0x0000040,  // unconditional branch
+  enum Flags : uint8_t {
+    kBranch              = 0x01,  // conditional or unconditional branch
+    kContinue            = 0x02,  // flow can continue to next statement
+    kSwitch              = 0x04,  // switch statement
+    kThrow               = 0x08,  // could cause an exception to be thrown
+    kReturn              = 0x10,  // returns, no additional statements
+    kInvoke              = 0x20,  // a flavor of invoke
+    kUnconditional       = 0x40,  // unconditional branch
+    kExperimental        = 0x80,  // is an experimental opcode
+  };
+
+  // Old flags. Keeping them around in case we might need them again some day.
+  enum ExtendedFlags : uint32_t {
     kAdd                 = 0x0000080,  // addition
     kSubtract            = 0x0000100,  // subtract
     kMultiply            = 0x0000200,  // multiply
@@ -162,10 +167,9 @@
     kClobber             = 0x0200000,  // clobbers memory in a big way (not just a write)
     kRegCFieldOrConstant = 0x0400000,  // is the third virtual register a field or literal constant (vC)
     kRegBFieldOrConstant = 0x0800000,  // is the second virtual register a field or literal constant (vB)
-    kExperimental        = 0x1000000,  // is an experimental opcode
   };
 
-  enum VerifyFlag {
+  enum VerifyFlag : uint32_t {
     kVerifyNone               = 0x0000000,
     kVerifyRegA               = 0x0000001,
     kVerifyRegAWide           = 0x0000002,
@@ -194,11 +198,22 @@
     kVerifyRegBCallSite       = 0x1000000
   };
 
+  // Collect the enums in a struct for better locality.
+  struct InstructionDescriptor {
+    uint32_t verify_flags;         // Set of VerifyFlag.
+    Format format;
+    IndexType index_type;
+    uint8_t flags;                 // Set of Flags.
+    int8_t size_in_code_units;
+  };
+
   static constexpr uint32_t kMaxVarArgRegs = 5;
 
+  static constexpr bool kHaveExperimentalInstructions = false;
+
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
-    int result = kInstructionSizeInCodeUnits[Opcode()];
+    int8_t result = kInstructionDescriptors[Opcode()].size_in_code_units;
     if (UNLIKELY(result < 0)) {
       return SizeInCodeUnitsComplexOpcode();
     } else {
@@ -497,32 +512,32 @@
 
   // Returns the format of the given opcode.
   static Format FormatOf(Code opcode) {
-    return kInstructionFormats[opcode];
+    return kInstructionDescriptors[opcode].format;
   }
 
   // Returns the index type of the given opcode.
   static IndexType IndexTypeOf(Code opcode) {
-    return kInstructionIndexTypes[opcode];
+    return kInstructionDescriptors[opcode].index_type;
   }
 
   // Returns the flags for the given opcode.
-  static int FlagsOf(Code opcode) {
-    return kInstructionFlags[opcode];
+  static uint8_t FlagsOf(Code opcode) {
+    return kInstructionDescriptors[opcode].flags;
   }
 
   // Return the verify flags for the given opcode.
-  static int VerifyFlagsOf(Code opcode) {
-    return kInstructionVerifyFlags[opcode];
+  static uint32_t VerifyFlagsOf(Code opcode) {
+    return kInstructionDescriptors[opcode].verify_flags;
   }
 
   // Returns true if this instruction is a branch.
   bool IsBranch() const {
-    return (kInstructionFlags[Opcode()] & kBranch) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kBranch) != 0;
   }
 
   // Returns true if this instruction is a unconditional branch.
   bool IsUnconditional() const {
-    return (kInstructionFlags[Opcode()] & kUnconditional) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kUnconditional) != 0;
   }
 
   // Returns the branch offset if this instruction is a branch.
@@ -533,23 +548,23 @@
 
   // Returns true if the instruction is a quickened instruction.
   bool IsQuickened() const {
-    return (kInstructionIndexTypes[Opcode()] == kIndexFieldOffset) ||
-        (kInstructionIndexTypes[Opcode()] == kIndexVtableOffset);
+    return (kInstructionDescriptors[Opcode()].index_type == kIndexFieldOffset) ||
+        (kInstructionDescriptors[Opcode()].index_type == kIndexVtableOffset);
   }
 
   // Returns true if this instruction is a switch.
   bool IsSwitch() const {
-    return (kInstructionFlags[Opcode()] & kSwitch) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kSwitch) != 0;
   }
 
   // Returns true if this instruction can throw.
   bool IsThrow() const {
-    return (kInstructionFlags[Opcode()] & kThrow) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kThrow) != 0;
   }
 
   // Determine if the instruction is any of 'return' instructions.
   bool IsReturn() const {
-    return (kInstructionFlags[Opcode()] & kReturn) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kReturn) != 0;
   }
 
   // Determine if this instruction ends execution of its basic block.
@@ -559,41 +574,41 @@
 
   // Determine if this instruction is an invoke.
   bool IsInvoke() const {
-    return (kInstructionFlags[Opcode()] & kInvoke) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kInvoke) != 0;
   }
 
   // Determine if this instruction is experimental.
   bool IsExperimental() const {
-    return (kInstructionFlags[Opcode()] & kExperimental) != 0;
+    return (kInstructionDescriptors[Opcode()].flags & kExperimental) != 0;
   }
 
   int GetVerifyTypeArgumentA() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegA | kVerifyRegAWide));
+    return (kInstructionDescriptors[Opcode()].verify_flags & (kVerifyRegA | kVerifyRegAWide));
   }
 
   int GetVerifyTypeArgumentB() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegB | kVerifyRegBField |
+    return (kInstructionDescriptors[Opcode()].verify_flags & (kVerifyRegB | kVerifyRegBField |
         kVerifyRegBMethod | kVerifyRegBNewInstance | kVerifyRegBString | kVerifyRegBType |
         kVerifyRegBWide));
   }
 
   int GetVerifyTypeArgumentC() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
+    return (kInstructionDescriptors[Opcode()].verify_flags & (kVerifyRegC | kVerifyRegCField |
         kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyTypeArgumentH() const {
-    return (kInstructionVerifyFlags[Opcode()] & kVerifyRegHPrototype);
+    return (kInstructionDescriptors[Opcode()].verify_flags & kVerifyRegHPrototype);
   }
 
   int GetVerifyExtraFlags() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
-        kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero | kVerifyVarArgRange |
-        kVerifyVarArgRangeNonZero | kVerifyError));
+    return (kInstructionDescriptors[Opcode()].verify_flags & (kVerifyArrayData |
+        kVerifyBranchTarget | kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero |
+        kVerifyVarArgRange | kVerifyVarArgRangeNonZero | kVerifyError));
   }
 
   bool GetVerifyIsRuntimeOnly() const {
-    return (kInstructionVerifyFlags[Opcode()] & kVerifyRuntimeOnly) != 0;
+    return (kInstructionDescriptors[Opcode()].verify_flags & kVerifyRuntimeOnly) != 0;
   }
 
   // Get the dex PC of this instruction as a offset in code units from the beginning of insns.
@@ -651,11 +666,9 @@
   }
 
   static const char* const kInstructionNames[];
-  static Format const kInstructionFormats[];
-  static IndexType const kInstructionIndexTypes[];
-  static int const kInstructionFlags[];
-  static int const kInstructionVerifyFlags[];
-  static int const kInstructionSizeInCodeUnits[];
+
+  static const InstructionDescriptor kInstructionDescriptors[];
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction);
 };
 std::ostream& operator<<(std::ostream& os, const Instruction::Code& code);
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 11dc7e2..d0a4ae5 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -17,264 +17,264 @@
 #ifndef ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 #define ART_RUNTIME_DEX_INSTRUCTION_LIST_H_
 
-// V(opcode, instruction_code, name, format, index, flags, verifier_flags);
+// V(opcode, instruction_code, name, format, index, flags, extended_flags, verifier_flags);
 #define DEX_INSTRUCTION_LIST(V) \
-  V(0x00, NOP, "nop", k10x, kIndexNone, kContinue, kVerifyNone) \
-  V(0x01, MOVE, "move", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x02, MOVE_FROM16, "move/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x03, MOVE_16, "move/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x04, MOVE_WIDE, "move-wide", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x07, MOVE_OBJECT, "move-object", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x0A, MOVE_RESULT, "move-result", k11x, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, kIndexNone, kContinue, kVerifyRegAWide) \
-  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, kIndexNone, kContinue, kVerifyRegA) \
-  V(0x0E, RETURN_VOID, "return-void", k10x, kIndexNone, kReturn, kVerifyNone) \
-  V(0x0F, RETURN, "return", k11x, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x10, RETURN_WIDE, "return-wide", k11x, kIndexNone, kReturn, kVerifyRegAWide) \
-  V(0x11, RETURN_OBJECT, "return-object", k11x, kIndexNone, kReturn, kVerifyRegA) \
-  V(0x12, CONST_4, "const/4", k11n, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x13, CONST_16, "const/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x14, CONST, "const", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x15, CONST_HIGH16, "const/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegA) \
-  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x18, CONST_WIDE, "const-wide", k51l, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, kIndexNone, kContinue | kRegBFieldOrConstant, kVerifyRegAWide) \
-  V(0x1A, CONST_STRING, "const-string", k21c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, kIndexStringRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBString) \
-  V(0x1C, CONST_CLASS, "const-class", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA) \
-  V(0x1F, CHECK_CAST, "check-cast", k21c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegBType) \
-  V(0x20, INSTANCE_OF, "instance-of", k22c, kIndexTypeRef, kContinue | kThrow, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
-  V(0x21, ARRAY_LENGTH, "array-length", k12x, kIndexNone, kContinue | kThrow, kVerifyRegA | kVerifyRegB) \
-  V(0x22, NEW_INSTANCE, "new-instance", k21c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
-  V(0x23, NEW_ARRAY, "new-array", k22c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
-  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArg) \
-  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, kIndexTypeRef, kContinue | kThrow | kClobber, kVerifyRegBType | kVerifyVarArgRange) \
-  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, kIndexNone, kContinue | kThrow | kClobber, kVerifyRegA | kVerifyArrayData) \
-  V(0x27, THROW, "throw", k11x, kIndexNone, kThrow, kVerifyRegA) \
-  V(0x28, GOTO, "goto", k10t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x29, GOTO_16, "goto/16", k20t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2A, GOTO_32, "goto/32", k30t, kIndexNone, kBranch | kUnconditional, kVerifyBranchTarget) \
-  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, kIndexNone, kContinue | kSwitch, kVerifyRegA | kVerifySwitchTargets) \
-  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x31, CMP_LONG, "cmp-long", k23x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x32, IF_EQ, "if-eq", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x33, IF_NE, "if-ne", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x34, IF_LT, "if-lt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x35, IF_GE, "if-ge", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x36, IF_GT, "if-gt", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x37, IF_LE, "if-le", k22t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
-  V(0x38, IF_EQZ, "if-eqz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x39, IF_NEZ, "if-nez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3A, IF_LTZ, "if-ltz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3B, IF_GEZ, "if-gez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3C, IF_GTZ, "if-gtz", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3D, IF_LEZ, "if-lez", k21t, kIndexNone, kContinue | kBranch, kVerifyRegA | kVerifyBranchTarget) \
-  V(0x3E, UNUSED_3E, "unused-3e", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x3F, UNUSED_3F, "unused-3f", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x40, UNUSED_40, "unused-40", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x41, UNUSED_41, "unused-41", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x42, UNUSED_42, "unused-42", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x43, UNUSED_43, "unused-43", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x44, AGET, "aget", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x45, AGET_WIDE, "aget-wide", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x46, AGET_OBJECT, "aget-object", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x48, AGET_BYTE, "aget-byte", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x49, AGET_CHAR, "aget-char", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4A, AGET_SHORT, "aget-short", k23x, kIndexNone, kContinue | kThrow | kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4B, APUT, "aput", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4C, APUT_WIDE, "aput-wide", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
-  V(0x4D, APUT_OBJECT, "aput-object", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x4F, APUT_BYTE, "aput-byte", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x50, APUT_CHAR, "aput-char", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x51, APUT_SHORT, "aput-short", k23x, kIndexNone, kContinue | kThrow | kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x52, IGET, "iget", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x53, IGET_WIDE, "iget-wide", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x54, IGET_OBJECT, "iget-object", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x56, IGET_BYTE, "iget-byte", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x57, IGET_CHAR, "iget-char", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x58, IGET_SHORT, "iget-short", k22c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x59, IPUT, "iput", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5A, IPUT_WIDE, "iput-wide", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
-  V(0x5B, IPUT_OBJECT, "iput-object", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5D, IPUT_BYTE, "iput-byte", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5E, IPUT_CHAR, "iput-char", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x5F, IPUT_SHORT, "iput-short", k22c, kIndexFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
-  V(0x60, SGET, "sget", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x61, SGET_WIDE, "sget-wide", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x62, SGET_OBJECT, "sget-object", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x64, SGET_BYTE, "sget-byte", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x65, SGET_CHAR, "sget-char", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x66, SGET_SHORT, "sget-short", k21c, kIndexFieldRef, kContinue | kThrow | kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x67, SPUT, "sput", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x68, SPUT_WIDE, "sput-wide", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
-  V(0x69, SPUT_OBJECT, "sput-object", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6B, SPUT_BYTE, "sput-byte", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6C, SPUT_CHAR, "sput-char", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6D, SPUT_SHORT, "sput-short", k21c, kIndexFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
-  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x71, INVOKE_STATIC, "invoke-static", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \
-  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \
-  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, kIndexNone, kReturn, kVerifyNone) \
-  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \
-  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
-  V(0x79, UNUSED_79, "unused-79", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x7A, UNUSED_7A, "unused-7a", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0x7B, NEG_INT, "neg-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7C, NOT_INT, "not-int", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x7D, NEG_LONG, "neg-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7E, NOT_LONG, "not-long", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x7F, NEG_FLOAT, "neg-float", k12x, kIndexNone, kContinue, kVerifyRegA | kVerifyRegB) \
-  V(0x80, NEG_DOUBLE, "neg-double", k12x, kIndexNone, kContinue, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x81, INT_TO_LONG, "int-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x84, LONG_TO_INT, "long-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegB) \
-  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, kIndexNone, kContinue | kCast, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegBWide) \
-  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, kIndexNone, kContinue | kCast, kVerifyRegA | kVerifyRegB) \
-  V(0x90, ADD_INT, "add-int", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x91, SUB_INT, "sub-int", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x92, MUL_INT, "mul-int", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x93, DIV_INT, "div-int", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x94, REM_INT, "rem-int", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x95, AND_INT, "and-int", k23x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x96, OR_INT, "or-int", k23x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x97, XOR_INT, "xor-int", k23x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x98, SHL_INT, "shl-int", k23x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x99, SHR_INT, "shr-int", k23x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9A, USHR_INT, "ushr-int", k23x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0x9B, ADD_LONG, "add-long", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9C, SUB_LONG, "sub-long", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9D, MUL_LONG, "mul-long", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9E, DIV_LONG, "div-long", k23x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0x9F, REM_LONG, "rem-long", k23x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA0, AND_LONG, "and-long", k23x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA1, OR_LONG, "or-long", k23x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA2, XOR_LONG, "xor-long", k23x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xA3, SHL_LONG, "shl-long", k23x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA4, SHR_LONG, "shr-long", k23x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA5, USHR_LONG, "ushr-long", k23x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
-  V(0xA6, ADD_FLOAT, "add-float", k23x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA7, SUB_FLOAT, "sub-float", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA8, MUL_FLOAT, "mul-float", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xA9, DIV_FLOAT, "div-float", k23x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAA, REM_FLOAT, "rem-float", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
-  V(0xAB, ADD_DOUBLE, "add-double", k23x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAC, SUB_DOUBLE, "sub-double", k23x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAD, MUL_DOUBLE, "mul-double", k23x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAE, DIV_DOUBLE, "div-double", k23x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xAF, REM_DOUBLE, "rem-double", k23x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
-  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegA | kVerifyRegB) \
-  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegA | kVerifyRegB) \
-  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegA | kVerifyRegB) \
-  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegA | kVerifyRegB) \
-  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegA | kVerifyRegB) \
-  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegA | kVerifyRegB) \
-  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, kIndexNone, kContinue | kThrow | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, kIndexNone, kContinue | kThrow | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, kIndexNone, kContinue | kAnd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, kIndexNone, kContinue | kOr, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, kIndexNone, kContinue | kXor, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, kIndexNone, kContinue | kShl, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, kIndexNone, kContinue | kShr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, kIndexNone, kContinue | kUshr, kVerifyRegAWide | kVerifyRegB) \
-  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegA | kVerifyRegB) \
-  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegA | kVerifyRegB) \
-  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegA | kVerifyRegB) \
-  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegA | kVerifyRegB) \
-  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegA | kVerifyRegB) \
-  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, kIndexNone, kContinue | kAdd, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, kIndexNone, kContinue | kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, kIndexNone, kContinue | kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, kIndexNone, kContinue | kDivide, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, kIndexNone, kContinue | kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
-  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD1, RSUB_INT, "rsub-int", k22s, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, kIndexNone, kContinue | kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, kIndexNone, kContinue | kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, kIndexNone, kContinue | kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, kIndexNone, kContinue | kThrow | kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, kIndexNone, kContinue | kThrow | kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, kIndexNone, kContinue | kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, kIndexNone, kContinue | kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, kIndexNone, kContinue | kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, kIndexNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, kIndexNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, kIndexNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, kIndexVtableOffset, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
-  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, UNUSED_F3, "unused-f3", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF4, UNUSED_F4, "unused-f4", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, UNUSED_F5, "unused-f5", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF6, UNUSED_F6, "unused-f6", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \
-  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
-  V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArg) \
-  V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArgRange) \
-  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError)
+  V(0x00, NOP, "nop", k10x, kIndexNone, kContinue, 0, kVerifyNone) \
+  V(0x01, MOVE, "move", k12x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x02, MOVE_FROM16, "move/from16", k22x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x03, MOVE_16, "move/16", k32x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x04, MOVE_WIDE, "move-wide", k12x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x05, MOVE_WIDE_FROM16, "move-wide/from16", k22x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x06, MOVE_WIDE_16, "move-wide/16", k32x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x07, MOVE_OBJECT, "move-object", k12x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x08, MOVE_OBJECT_FROM16, "move-object/from16", k22x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x09, MOVE_OBJECT_16, "move-object/16", k32x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x0A, MOVE_RESULT, "move-result", k11x, kIndexNone, kContinue, 0, kVerifyRegA) \
+  V(0x0B, MOVE_RESULT_WIDE, "move-result-wide", k11x, kIndexNone, kContinue, 0, kVerifyRegAWide) \
+  V(0x0C, MOVE_RESULT_OBJECT, "move-result-object", k11x, kIndexNone, kContinue, 0, kVerifyRegA) \
+  V(0x0D, MOVE_EXCEPTION, "move-exception", k11x, kIndexNone, kContinue, 0, kVerifyRegA) \
+  V(0x0E, RETURN_VOID, "return-void", k10x, kIndexNone, kReturn, 0, kVerifyNone) \
+  V(0x0F, RETURN, "return", k11x, kIndexNone, kReturn, 0, kVerifyRegA) \
+  V(0x10, RETURN_WIDE, "return-wide", k11x, kIndexNone, kReturn, 0, kVerifyRegAWide) \
+  V(0x11, RETURN_OBJECT, "return-object", k11x, kIndexNone, kReturn, 0, kVerifyRegA) \
+  V(0x12, CONST_4, "const/4", k11n, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x13, CONST_16, "const/16", k21s, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x14, CONST, "const", k31i, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x15, CONST_HIGH16, "const/high16", k21h, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegA) \
+  V(0x16, CONST_WIDE_16, "const-wide/16", k21s, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x17, CONST_WIDE_32, "const-wide/32", k31i, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x18, CONST_WIDE, "const-wide", k51l, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x19, CONST_WIDE_HIGH16, "const-wide/high16", k21h, kIndexNone, kContinue, kRegBFieldOrConstant, kVerifyRegAWide) \
+  V(0x1A, CONST_STRING, "const-string", k21c, kIndexStringRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBString) \
+  V(0x1B, CONST_STRING_JUMBO, "const-string/jumbo", k31c, kIndexStringRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBString) \
+  V(0x1C, CONST_CLASS, "const-class", k21c, kIndexTypeRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBType) \
+  V(0x1D, MONITOR_ENTER, "monitor-enter", k11x, kIndexNone, kContinue | kThrow, kClobber, kVerifyRegA) \
+  V(0x1E, MONITOR_EXIT, "monitor-exit", k11x, kIndexNone, kContinue | kThrow, kClobber, kVerifyRegA) \
+  V(0x1F, CHECK_CAST, "check-cast", k21c, kIndexTypeRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBType) \
+  V(0x20, INSTANCE_OF, "instance-of", k22c, kIndexTypeRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
+  V(0x21, ARRAY_LENGTH, "array-length", k12x, kIndexNone, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x22, NEW_INSTANCE, "new-instance", k21c, kIndexTypeRef, kContinue | kThrow, kClobber, kVerifyRegA | kVerifyRegBNewInstance) \
+  V(0x23, NEW_ARRAY, "new-array", k22c, kIndexTypeRef, kContinue | kThrow, kClobber, kVerifyRegA | kVerifyRegB | kVerifyRegCNewArray) \
+  V(0x24, FILLED_NEW_ARRAY, "filled-new-array", k35c, kIndexTypeRef, kContinue | kThrow, kClobber, kVerifyRegBType | kVerifyVarArg) \
+  V(0x25, FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, kIndexTypeRef, kContinue | kThrow, kClobber, kVerifyRegBType | kVerifyVarArgRange) \
+  V(0x26, FILL_ARRAY_DATA, "fill-array-data", k31t, kIndexNone, kContinue | kThrow, kClobber, kVerifyRegA | kVerifyArrayData) \
+  V(0x27, THROW, "throw", k11x, kIndexNone, kThrow, 0, kVerifyRegA) \
+  V(0x28, GOTO, "goto", k10t, kIndexNone, kBranch | kUnconditional, 0, kVerifyBranchTarget) \
+  V(0x29, GOTO_16, "goto/16", k20t, kIndexNone, kBranch | kUnconditional, 0, kVerifyBranchTarget) \
+  V(0x2A, GOTO_32, "goto/32", k30t, kIndexNone, kBranch | kUnconditional, 0, kVerifyBranchTarget) \
+  V(0x2B, PACKED_SWITCH, "packed-switch", k31t, kIndexNone, kContinue | kSwitch, 0, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2C, SPARSE_SWITCH, "sparse-switch", k31t, kIndexNone, kContinue | kSwitch, 0, kVerifyRegA | kVerifySwitchTargets) \
+  V(0x2D, CMPL_FLOAT, "cmpl-float", k23x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2E, CMPG_FLOAT, "cmpg-float", k23x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x2F, CMPL_DOUBLE, "cmpl-double", k23x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x30, CMPG_DOUBLE, "cmpg-double", k23x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x31, CMP_LONG, "cmp-long", k23x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x32, IF_EQ, "if-eq", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x33, IF_NE, "if-ne", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x34, IF_LT, "if-lt", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x35, IF_GE, "if-ge", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x36, IF_GT, "if-gt", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x37, IF_LE, "if-le", k22t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyRegB | kVerifyBranchTarget) \
+  V(0x38, IF_EQZ, "if-eqz", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x39, IF_NEZ, "if-nez", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3A, IF_LTZ, "if-ltz", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3B, IF_GEZ, "if-gez", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3C, IF_GTZ, "if-gtz", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3D, IF_LEZ, "if-lez", k21t, kIndexNone, kContinue | kBranch, 0, kVerifyRegA | kVerifyBranchTarget) \
+  V(0x3E, UNUSED_3E, "unused-3e", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x3F, UNUSED_3F, "unused-3f", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x40, UNUSED_40, "unused-40", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x41, UNUSED_41, "unused-41", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x42, UNUSED_42, "unused-42", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x43, UNUSED_43, "unused-43", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x44, AGET, "aget", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x45, AGET_WIDE, "aget-wide", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x46, AGET_OBJECT, "aget-object", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x47, AGET_BOOLEAN, "aget-boolean", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x48, AGET_BYTE, "aget-byte", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x49, AGET_CHAR, "aget-char", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4A, AGET_SHORT, "aget-short", k23x, kIndexNone, kContinue | kThrow, kLoad, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4B, APUT, "aput", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4C, APUT_WIDE, "aput-wide", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegAWide | kVerifyRegB | kVerifyRegC) \
+  V(0x4D, APUT_OBJECT, "aput-object", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4E, APUT_BOOLEAN, "aput-boolean", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x4F, APUT_BYTE, "aput-byte", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x50, APUT_CHAR, "aput-char", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x51, APUT_SHORT, "aput-short", k23x, kIndexNone, kContinue | kThrow, kStore, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x52, IGET, "iget", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x53, IGET_WIDE, "iget-wide", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x54, IGET_OBJECT, "iget-object", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x55, IGET_BOOLEAN, "iget-boolean", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x56, IGET_BYTE, "iget-byte", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x57, IGET_CHAR, "iget-char", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x58, IGET_SHORT, "iget-short", k22c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x59, IPUT, "iput", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5A, IPUT_WIDE, "iput-wide", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRegCField) \
+  V(0x5B, IPUT_OBJECT, "iput-object", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5C, IPUT_BOOLEAN, "iput-boolean", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5D, IPUT_BYTE, "iput-byte", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5E, IPUT_CHAR, "iput-char", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x5F, IPUT_SHORT, "iput-short", k22c, kIndexFieldRef, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRegCField) \
+  V(0x60, SGET, "sget", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x61, SGET_WIDE, "sget-wide", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x62, SGET_OBJECT, "sget-object", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x63, SGET_BOOLEAN, "sget-boolean", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x64, SGET_BYTE, "sget-byte", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x65, SGET_CHAR, "sget-char", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x66, SGET_SHORT, "sget-short", k21c, kIndexFieldRef, kContinue | kThrow, kLoad | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x67, SPUT, "sput", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x68, SPUT_WIDE, "sput-wide", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegAWide | kVerifyRegBField) \
+  V(0x69, SPUT_OBJECT, "sput-object", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6A, SPUT_BOOLEAN, "sput-boolean", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6B, SPUT_BYTE, "sput-byte", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6C, SPUT_CHAR, "sput-char", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6D, SPUT_SHORT, "sput-short", k21c, kIndexFieldRef, kContinue | kThrow, kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \
+  V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x6F, INVOKE_SUPER, "invoke-super", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x71, INVOKE_STATIC, "invoke-static", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArg) \
+  V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgNonZero) \
+  V(0x73, RETURN_VOID_NO_BARRIER, "return-void-no-barrier", k10x, kIndexNone, kReturn, 0, kVerifyNone) \
+  V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRange) \
+  V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, kIndexMethodRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \
+  V(0x79, UNUSED_79, "unused-79", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x7A, UNUSED_7A, "unused-7a", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0x7B, NEG_INT, "neg-int", k12x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x7C, NOT_INT, "not-int", k12x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x7D, NEG_LONG, "neg-long", k12x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7E, NOT_LONG, "not-long", k12x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x7F, NEG_FLOAT, "neg-float", k12x, kIndexNone, kContinue, 0, kVerifyRegA | kVerifyRegB) \
+  V(0x80, NEG_DOUBLE, "neg-double", k12x, kIndexNone, kContinue, 0, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x81, INT_TO_LONG, "int-to-long", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x82, INT_TO_FLOAT, "int-to-float", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x83, INT_TO_DOUBLE, "int-to-double", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x84, LONG_TO_INT, "long-to-int", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x85, LONG_TO_FLOAT, "long-to-float", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x86, LONG_TO_DOUBLE, "long-to-double", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x87, FLOAT_TO_INT, "float-to-int", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x88, FLOAT_TO_LONG, "float-to-long", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x89, FLOAT_TO_DOUBLE, "float-to-double", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegB) \
+  V(0x8A, DOUBLE_TO_INT, "double-to-int", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8B, DOUBLE_TO_LONG, "double-to-long", k12x, kIndexNone, kContinue, kCast, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0x8C, DOUBLE_TO_FLOAT, "double-to-float", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegBWide) \
+  V(0x8D, INT_TO_BYTE, "int-to-byte", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8E, INT_TO_CHAR, "int-to-char", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x8F, INT_TO_SHORT, "int-to-short", k12x, kIndexNone, kContinue, kCast, kVerifyRegA | kVerifyRegB) \
+  V(0x90, ADD_INT, "add-int", k23x, kIndexNone, kContinue, kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x91, SUB_INT, "sub-int", k23x, kIndexNone, kContinue, kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x92, MUL_INT, "mul-int", k23x, kIndexNone, kContinue, kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x93, DIV_INT, "div-int", k23x, kIndexNone, kContinue | kThrow, kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x94, REM_INT, "rem-int", k23x, kIndexNone, kContinue | kThrow, kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x95, AND_INT, "and-int", k23x, kIndexNone, kContinue, kAnd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x96, OR_INT, "or-int", k23x, kIndexNone, kContinue, kOr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x97, XOR_INT, "xor-int", k23x, kIndexNone, kContinue, kXor, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x98, SHL_INT, "shl-int", k23x, kIndexNone, kContinue, kShl, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x99, SHR_INT, "shr-int", k23x, kIndexNone, kContinue, kShr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9A, USHR_INT, "ushr-int", k23x, kIndexNone, kContinue, kUshr, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0x9B, ADD_LONG, "add-long", k23x, kIndexNone, kContinue, kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9C, SUB_LONG, "sub-long", k23x, kIndexNone, kContinue, kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9D, MUL_LONG, "mul-long", k23x, kIndexNone, kContinue, kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9E, DIV_LONG, "div-long", k23x, kIndexNone, kContinue | kThrow, kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0x9F, REM_LONG, "rem-long", k23x, kIndexNone, kContinue | kThrow, kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA0, AND_LONG, "and-long", k23x, kIndexNone, kContinue, kAnd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA1, OR_LONG, "or-long", k23x, kIndexNone, kContinue, kOr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA2, XOR_LONG, "xor-long", k23x, kIndexNone, kContinue, kXor, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xA3, SHL_LONG, "shl-long", k23x, kIndexNone, kContinue, kShl, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA4, SHR_LONG, "shr-long", k23x, kIndexNone, kContinue, kShr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA5, USHR_LONG, "ushr-long", k23x, kIndexNone, kContinue, kUshr, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegC) \
+  V(0xA6, ADD_FLOAT, "add-float", k23x, kIndexNone, kContinue, kAdd, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA7, SUB_FLOAT, "sub-float", k23x, kIndexNone, kContinue, kSubtract, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA8, MUL_FLOAT, "mul-float", k23x, kIndexNone, kContinue, kMultiply, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xA9, DIV_FLOAT, "div-float", k23x, kIndexNone, kContinue, kDivide, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAA, REM_FLOAT, "rem-float", k23x, kIndexNone, kContinue, kRemainder, kVerifyRegA | kVerifyRegB | kVerifyRegC) \
+  V(0xAB, ADD_DOUBLE, "add-double", k23x, kIndexNone, kContinue, kAdd, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAC, SUB_DOUBLE, "sub-double", k23x, kIndexNone, kContinue, kSubtract, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAD, MUL_DOUBLE, "mul-double", k23x, kIndexNone, kContinue, kMultiply, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAE, DIV_DOUBLE, "div-double", k23x, kIndexNone, kContinue, kDivide, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xAF, REM_DOUBLE, "rem-double", k23x, kIndexNone, kContinue, kRemainder, kVerifyRegAWide | kVerifyRegBWide | kVerifyRegCWide) \
+  V(0xB0, ADD_INT_2ADDR, "add-int/2addr", k12x, kIndexNone, kContinue, kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xB1, SUB_INT_2ADDR, "sub-int/2addr", k12x, kIndexNone, kContinue, kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xB2, MUL_INT_2ADDR, "mul-int/2addr", k12x, kIndexNone, kContinue, kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xB3, DIV_INT_2ADDR, "div-int/2addr", k12x, kIndexNone, kContinue | kThrow, kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xB4, REM_INT_2ADDR, "rem-int/2addr", k12x, kIndexNone, kContinue | kThrow, kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xB5, AND_INT_2ADDR, "and-int/2addr", k12x, kIndexNone, kContinue, kAnd, kVerifyRegA | kVerifyRegB) \
+  V(0xB6, OR_INT_2ADDR, "or-int/2addr", k12x, kIndexNone, kContinue, kOr, kVerifyRegA | kVerifyRegB) \
+  V(0xB7, XOR_INT_2ADDR, "xor-int/2addr", k12x, kIndexNone, kContinue, kXor, kVerifyRegA | kVerifyRegB) \
+  V(0xB8, SHL_INT_2ADDR, "shl-int/2addr", k12x, kIndexNone, kContinue, kShl, kVerifyRegA | kVerifyRegB) \
+  V(0xB9, SHR_INT_2ADDR, "shr-int/2addr", k12x, kIndexNone, kContinue, kShr, kVerifyRegA | kVerifyRegB) \
+  V(0xBA, USHR_INT_2ADDR, "ushr-int/2addr", k12x, kIndexNone, kContinue, kUshr, kVerifyRegA | kVerifyRegB) \
+  V(0xBB, ADD_LONG_2ADDR, "add-long/2addr", k12x, kIndexNone, kContinue, kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBC, SUB_LONG_2ADDR, "sub-long/2addr", k12x, kIndexNone, kContinue, kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBD, MUL_LONG_2ADDR, "mul-long/2addr", k12x, kIndexNone, kContinue, kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBE, DIV_LONG_2ADDR, "div-long/2addr", k12x, kIndexNone, kContinue | kThrow, kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xBF, REM_LONG_2ADDR, "rem-long/2addr", k12x, kIndexNone, kContinue | kThrow, kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC0, AND_LONG_2ADDR, "and-long/2addr", k12x, kIndexNone, kContinue, kAnd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC1, OR_LONG_2ADDR, "or-long/2addr", k12x, kIndexNone, kContinue, kOr, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC2, XOR_LONG_2ADDR, "xor-long/2addr", k12x, kIndexNone, kContinue, kXor, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xC3, SHL_LONG_2ADDR, "shl-long/2addr", k12x, kIndexNone, kContinue, kShl, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC4, SHR_LONG_2ADDR, "shr-long/2addr", k12x, kIndexNone, kContinue, kShr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC5, USHR_LONG_2ADDR, "ushr-long/2addr", k12x, kIndexNone, kContinue, kUshr, kVerifyRegAWide | kVerifyRegB) \
+  V(0xC6, ADD_FLOAT_2ADDR, "add-float/2addr", k12x, kIndexNone, kContinue, kAdd, kVerifyRegA | kVerifyRegB) \
+  V(0xC7, SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, kIndexNone, kContinue, kSubtract, kVerifyRegA | kVerifyRegB) \
+  V(0xC8, MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, kIndexNone, kContinue, kMultiply, kVerifyRegA | kVerifyRegB) \
+  V(0xC9, DIV_FLOAT_2ADDR, "div-float/2addr", k12x, kIndexNone, kContinue, kDivide, kVerifyRegA | kVerifyRegB) \
+  V(0xCA, REM_FLOAT_2ADDR, "rem-float/2addr", k12x, kIndexNone, kContinue, kRemainder, kVerifyRegA | kVerifyRegB) \
+  V(0xCB, ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, kIndexNone, kContinue, kAdd, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCC, SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, kIndexNone, kContinue, kSubtract, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCD, MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, kIndexNone, kContinue, kMultiply, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCE, DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, kIndexNone, kContinue, kDivide, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xCF, REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, kIndexNone, kContinue, kRemainder, kVerifyRegAWide | kVerifyRegBWide) \
+  V(0xD0, ADD_INT_LIT16, "add-int/lit16", k22s, kIndexNone, kContinue, kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD1, RSUB_INT, "rsub-int", k22s, kIndexNone, kContinue, kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD2, MUL_INT_LIT16, "mul-int/lit16", k22s, kIndexNone, kContinue, kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD3, DIV_INT_LIT16, "div-int/lit16", k22s, kIndexNone, kContinue | kThrow, kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD4, REM_INT_LIT16, "rem-int/lit16", k22s, kIndexNone, kContinue | kThrow, kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD5, AND_INT_LIT16, "and-int/lit16", k22s, kIndexNone, kContinue, kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD6, OR_INT_LIT16, "or-int/lit16", k22s, kIndexNone, kContinue, kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD7, XOR_INT_LIT16, "xor-int/lit16", k22s, kIndexNone, kContinue, kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD8, ADD_INT_LIT8, "add-int/lit8", k22b, kIndexNone, kContinue, kAdd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xD9, RSUB_INT_LIT8, "rsub-int/lit8", k22b, kIndexNone, kContinue, kSubtract | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDA, MUL_INT_LIT8, "mul-int/lit8", k22b, kIndexNone, kContinue, kMultiply | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDB, DIV_INT_LIT8, "div-int/lit8", k22b, kIndexNone, kContinue | kThrow, kDivide | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDC, REM_INT_LIT8, "rem-int/lit8", k22b, kIndexNone, kContinue | kThrow, kRemainder | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDD, AND_INT_LIT8, "and-int/lit8", k22b, kIndexNone, kContinue, kAnd | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDE, OR_INT_LIT8, "or-int/lit8", k22b, kIndexNone, kContinue, kOr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xDF, XOR_INT_LIT8, "xor-int/lit8", k22b, kIndexNone, kContinue, kXor | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, kIndexNone, kContinue, kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, kIndexNone, kContinue, kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, kIndexNone, kContinue, kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, kIndexVtableOffset, kContinue | kThrow | kInvoke, 0, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, kIndexVtableOffset, kContinue | kThrow | kInvoke, 0, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \
+  V(0xEB, IPUT_BOOLEAN_QUICK, "iput-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEC, IPUT_BYTE_QUICK, "iput-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xED, IPUT_CHAR_QUICK, "iput-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEE, IPUT_SHORT_QUICK, "iput-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xEF, IGET_BOOLEAN_QUICK, "iget-boolean-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, kIndexFieldOffset, kContinue | kThrow, kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xF3, UNUSED_F3, "unused-f3", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF4, UNUSED_F4, "unused-f4", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF5, UNUSED_F5, "unused-f5", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF6, UNUSED_F6, "unused-f6", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
+  V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, 0, kVerifyRegBCallSite | kVerifyVarArg) \
+  V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, 0, kVerifyRegBCallSite | kVerifyVarArgRange) \
+  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, 0, kVerifyError) \
+  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, 0, kVerifyError)
 
 #define DEX_INSTRUCTION_FORMAT_LIST(V) \
   V(k10x) \
diff --git a/runtime/dex_instruction_visitor.h b/runtime/dex_instruction_visitor.h
deleted file mode 100644
index 42af6a9..0000000
--- a/runtime/dex_instruction_visitor.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_DEX_INSTRUCTION_VISITOR_H_
-#define ART_RUNTIME_DEX_INSTRUCTION_VISITOR_H_
-
-#include "base/macros.h"
-#include "dex_instruction.h"
-
-namespace art {
-
-template<typename T>
-class DexInstructionVisitor {
- public:
-  void Visit(const uint16_t* code, size_t size_in_bytes) {
-    T* derived = static_cast<T*>(this);
-    size_t size_in_code_units = size_in_bytes / sizeof(uint16_t);
-    size_t i = 0;
-    while (i < size_in_code_units) {
-      const Instruction* inst = Instruction::At(&code[i]);
-      switch (inst->Opcode()) {
-#define INSTRUCTION_CASE(o, cname, p, f, i, a, v)  \
-        case Instruction::cname: {                    \
-          derived->Do_ ## cname(inst);                \
-          break;                                      \
-        }
-#include "dex_instruction_list.h"
-        DEX_INSTRUCTION_LIST(INSTRUCTION_CASE)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_CASE
-        default:
-          CHECK(false);
-      }
-      i += inst->SizeInCodeUnits();
-    }
-  }
-
- private:
-  // Specific handlers for each instruction.
-#define INSTRUCTION_VISITOR(o, cname, p, f, i, a, v)    \
-  void Do_ ## cname(const Instruction* inst) {             \
-    T* derived = static_cast<T*>(this);                    \
-    derived->Do_Default(inst);                             \
-  }
-#include "dex_instruction_list.h"
-  DEX_INSTRUCTION_LIST(INSTRUCTION_VISITOR)
-#undef DEX_INSTRUCTION_LIST
-#undef INSTRUCTION_VISITOR
-
-  // The default instruction handler.
-  void Do_Default(const Instruction*) {
-    return;
-  }
-};
-
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_DEX_INSTRUCTION_VISITOR_H_
diff --git a/runtime/dex_instruction_visitor_test.cc b/runtime/dex_instruction_visitor_test.cc
deleted file mode 100644
index 5273084..0000000
--- a/runtime/dex_instruction_visitor_test.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex_instruction_visitor.h"
-
-#include <memory>
-
-#include "gtest/gtest.h"
-
-namespace art {
-
-class TestVisitor : public DexInstructionVisitor<TestVisitor> {};
-
-TEST(InstructionTest, Init) {
-  std::unique_ptr<TestVisitor> visitor(new TestVisitor);
-}
-
-class CountVisitor : public DexInstructionVisitor<CountVisitor> {
- public:
-  int count_;
-
-  CountVisitor() : count_(0) {}
-
-  void Do_Default(const Instruction*) {
-    ++count_;
-  }
-};
-
-TEST(InstructionTest, Count) {
-  CountVisitor v0;
-  const uint16_t c0[] = {};
-  v0.Visit(c0, sizeof(c0));
-  EXPECT_EQ(0, v0.count_);
-
-  CountVisitor v1;
-  const uint16_t c1[] = { 0 };
-  v1.Visit(c1, sizeof(c1));
-  EXPECT_EQ(1, v1.count_);
-
-  CountVisitor v2;
-  const uint16_t c2[] = { 0, 0 };
-  v2.Visit(c2, sizeof(c2));
-  EXPECT_EQ(2, v2.count_);
-
-  CountVisitor v3;
-  const uint16_t c3[] = { 0, 0, 0, };
-  v3.Visit(c3, sizeof(c3));
-  EXPECT_EQ(3, v3.count_);
-
-  CountVisitor v4;
-  const uint16_t c4[] = { 0, 0, 0, 0  };
-  v4.Visit(c4, sizeof(c4));
-  EXPECT_EQ(4, v4.count_);
-}
-
-}  // namespace art
diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc
index 24b1abb..3c8243a 100644
--- a/runtime/dexopt_test.cc
+++ b/runtime/dexopt_test.cc
@@ -45,18 +45,23 @@
 }
 
 void DexoptTest::GenerateOatForTest(const std::string& dex_location,
-                        const std::string& oat_location,
-                        CompilerFilter::Filter filter,
-                        bool relocate,
-                        bool pic,
-                        bool with_alternate_image) {
+                                    const std::string& oat_location_in,
+                                    CompilerFilter::Filter filter,
+                                    bool relocate,
+                                    bool pic,
+                                    bool with_alternate_image) {
   std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA));
   std::string dalvik_cache_tmp = dalvik_cache + ".redirected";
-
+  std::string oat_location = oat_location_in;
   if (!relocate) {
     // Temporarily redirect the dalvik cache so dex2oat doesn't find the
     // relocated image file.
     ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno);
+    // If the oat location is in dalvik cache, replace the cache path with the temporary one.
+    size_t pos = oat_location.find(dalvik_cache);
+    if (pos != std::string::npos) {
+        oat_location = oat_location.replace(pos, dalvik_cache.length(), dalvik_cache_tmp);
+    }
   }
 
   std::vector<std::string> args;
@@ -90,6 +95,7 @@
   if (!relocate) {
     // Restore the dalvik cache if needed.
     ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno);
+    oat_location = oat_location_in;
   }
 
   // Verify the odex file was generated as expected.
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 3820d85..5762e4f 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -16,6 +16,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "base/systrace.h"
 #include "callee_save_frame.h"
 #include "interpreter/interpreter.h"
 #include "obj_ptr-inl.h"  // TODO: Find the other include that isn't complete, and clean this up.
@@ -24,8 +25,9 @@
 
 namespace art {
 
-NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+NO_RETURN static void artDeoptimizeImpl(Thread* self, DeoptimizationKind kind, bool single_frame)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  Runtime::Current()->IncrementDeoptimizationCount(kind);
   if (VLOG_IS_ON(deopt)) {
     if (single_frame) {
       // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
@@ -38,10 +40,13 @@
 
   self->AssertHasDeoptimizationContext();
   QuickExceptionHandler exception_handler(self, true);
-  if (single_frame) {
-    exception_handler.DeoptimizeSingleFrame();
-  } else {
-    exception_handler.DeoptimizeStack();
+  {
+    ScopedTrace trace(std::string("Deoptimization ") + GetDeoptimizationKindName(kind));
+    if (single_frame) {
+      exception_handler.DeoptimizeSingleFrame(kind);
+    } else {
+      exception_handler.DeoptimizeStack();
+    }
   }
   uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
   if (exception_handler.IsFullFragmentDone()) {
@@ -57,18 +62,18 @@
 
 extern "C" NO_RETURN void artDeoptimize(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  artDeoptimizeImpl(self, false);
+  artDeoptimizeImpl(self, DeoptimizationKind::kFullFrame, false);
 }
 
-// This is called directly from compiled code by an HDepptimize.
-extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
+// This is called directly from compiled code by an HDeoptimize.
+extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
-  artDeoptimizeImpl(self, true);
+  artDeoptimizeImpl(self, kind, true);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 915f18e..6cd9dc1 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -21,6 +21,7 @@
 
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "deoptimization_kind.h"
 #include "offsets.h"
 
 #define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e0a2e3c..e2d45ac 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -140,7 +140,7 @@
   V(ThrowNullPointer, void, void) \
   V(ThrowStackOverflow, void, void*) \
   V(ThrowStringBounds, void, int32_t, int32_t) \
-  V(Deoptimize, void, void) \
+  V(Deoptimize, void, DeoptimizationKind) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
   V(A64Store, void, volatile int64_t *, int64_t) \
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 4ca52de..fa287cb 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_
 
+#include "deoptimization_kind.h"
+
 namespace art {
 
 #ifndef BUILDING_LIBART
@@ -77,7 +79,7 @@
 }
 
 // Stub to deoptimize from compiled code.
-extern "C" void art_quick_deoptimize_from_compiled_code();
+extern "C" void art_quick_deoptimize_from_compiled_code(DeoptimizationKind);
 
 // The return_pc of instrumentation exit stub.
 extern "C" void art_quick_instrumentation_exit();
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 7f738bf..5594f4d 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -129,7 +129,21 @@
 
 void FaultManager::Init() {
   CHECK(!initialized_);
-  AddSpecialSignalHandlerFn(SIGSEGV, art_fault_handler);
+  sigset_t mask;
+  sigfillset(&mask);
+  sigdelset(&mask, SIGABRT);
+  sigdelset(&mask, SIGBUS);
+  sigdelset(&mask, SIGFPE);
+  sigdelset(&mask, SIGILL);
+  sigdelset(&mask, SIGSEGV);
+
+  SigchainAction sa = {
+    .sc_sigaction = art_fault_handler,
+    .sc_mask = mask,
+    .sc_flags = 0UL,
+  };
+
+  AddSpecialSignalHandlerFn(SIGSEGV, &sa);
   initialized_ = true;
 }
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index a450a75..b0218b5 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -299,7 +299,7 @@
   objects_moved_.StoreRelaxed(0);
   GcCause gc_cause = GetCurrentIteration()->GetGcCause();
   if (gc_cause == kGcCauseExplicit ||
-      gc_cause == kGcCauseForNativeAlloc ||
+      gc_cause == kGcCauseForNativeAllocBlocking ||
       gc_cause == kGcCauseCollectorTransition ||
       GetCurrentIteration()->GetClearSoftReferences()) {
     force_evacuate_all_ = true;
@@ -616,25 +616,8 @@
   ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
   FlipCallback flip_callback(this);
 
-  // This is the point where Concurrent-Copying will pause all threads. We report a pause here, if
-  // necessary. This is slightly over-reporting, as this includes the time to actually suspend
-  // threads.
-  {
-    GcPauseListener* pause_listener = GetHeap()->GetGcPauseListener();
-    if (pause_listener != nullptr) {
-      pause_listener->StartPause();
-    }
-  }
-
-  size_t barrier_count = Runtime::Current()->FlipThreadRoots(
-      &thread_flip_visitor, &flip_callback, this);
-
-  {
-    GcPauseListener* pause_listener = GetHeap()->GetGcPauseListener();
-    if (pause_listener != nullptr) {
-      pause_listener->EndPause();
-    }
-  }
+  size_t barrier_count = Runtime::Current()->GetThreadList()->FlipThreadRoots(
+      &thread_flip_visitor, &flip_callback, this, GetHeap()->GetGcPauseListener());
 
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index c09e0eb..377f4d3 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -135,6 +135,9 @@
   void RevokeThreadLocalMarkStack(Thread* thread) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
 
+  virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   void PushOntoMarkStack(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
@@ -200,8 +203,6 @@
                                  bool do_atomic_update) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
-  virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
-      REQUIRES_SHARED(Locks::mutator_lock_);
   bool IsMarkedInUnevacFromSpace(mirror::Object* from_ref)
       REQUIRES_SHARED(Locks::mutator_lock_);
   virtual bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* field,
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 41e6051..d379892 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -193,6 +193,7 @@
   if (generational_) {
     if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
         GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAlloc ||
+        GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAllocBlocking ||
         GetCurrentIteration()->GetClearSoftReferences()) {
       // If an explicit, native allocation-triggered, or last attempt
       // collection, collect the whole heap.
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index c35ec7c..2bbc86e 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -29,7 +29,7 @@
     case kGcCauseBackground: return "Background";
     case kGcCauseExplicit: return "Explicit";
     case kGcCauseForNativeAlloc: return "NativeAlloc";
-    case kGcCauseForNativeAllocBackground: return "NativeAllocBackground";
+    case kGcCauseForNativeAllocBlocking: return "NativeAllocBlocking";
     case kGcCauseCollectorTransition: return "CollectorTransition";
     case kGcCauseDisableMovingGc: return "DisableMovingGc";
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 41c8943..b8cf3c4 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -31,10 +31,12 @@
   kGcCauseBackground,
   // An explicit System.gc() call.
   kGcCauseExplicit,
-  // GC triggered for a native allocation.
+  // GC triggered for a native allocation when NativeAllocationGcWatermark is exceeded.
+  // (This may be a blocking GC depending on whether we run a non-concurrent collector).
   kGcCauseForNativeAlloc,
-  // Background GC triggered for a native allocation.
-  kGcCauseForNativeAllocBackground,
+  // GC triggered for a native allocation when NativeAllocationBlockingGcWatermark is exceeded.
+  // (This is always a blocking GC).
+  kGcCauseForNativeAllocBlocking,
   // GC triggered for a collector transition.
   kGcCauseCollectorTransition,
   // Not a real GC cause, used when we disable moving GC (currently for GetPrimitiveArrayCritical).
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bd4f99b..668fb4b 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -150,8 +150,13 @@
 static uint8_t* const kPreferredAllocSpaceBegin =
     reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity);
 #else
-// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+#ifdef __ANDROID__
+// For 32-bit Android, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
 static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000);
+#else
+// For 32-bit host, use 0x40000000 because asan uses most of the space below this.
+static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x40000000);
+#endif
 #endif
 
 static inline bool CareAboutPauseTimes() {
@@ -558,6 +563,7 @@
   native_blocking_gc_lock_ = new Mutex("Native blocking GC lock");
   native_blocking_gc_cond_.reset(new ConditionVariable("Native blocking GC condition variable",
                                                        *native_blocking_gc_lock_));
+  native_blocking_gc_is_assigned_ = false;
   native_blocking_gc_in_progress_ = false;
   native_blocking_gcs_finished_ = 0;
 
@@ -2690,6 +2696,10 @@
     // old_native_bytes_allocated_ now that GC has been triggered, resetting
     // new_native_bytes_allocated_ to zero in the process.
     old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0));
+    if (gc_cause == kGcCauseForNativeAllocBlocking) {
+      MutexLock mu(self, *native_blocking_gc_lock_);
+      native_blocking_gc_in_progress_ = true;
+    }
   }
 
   DCHECK_LT(gc_type, collector::kGcTypeMax);
@@ -3521,6 +3531,7 @@
     // it results in log spam. kGcCauseExplicit is already logged in LogGC, so avoid it here too.
     if (cause == kGcCauseForAlloc ||
         cause == kGcCauseForNativeAlloc ||
+        cause == kGcCauseForNativeAllocBlocking ||
         cause == kGcCauseDisableMovingGc) {
       VLOG(gc) << "Starting a blocking GC " << cause;
     }
@@ -3922,33 +3933,36 @@
         // finish before addressing the fact that we exceeded the blocking
         // watermark again.
         do {
+          ScopedTrace trace("RegisterNativeAllocation: Wait For Prior Blocking GC Completion");
           native_blocking_gc_cond_->Wait(self);
         } while (native_blocking_gcs_finished_ == initial_gcs_finished);
         initial_gcs_finished++;
       }
 
       // It's possible multiple threads have seen that we exceeded the
-      // blocking watermark. Ensure that only one of those threads runs the
-      // blocking GC. The rest of the threads should instead wait for the
-      // blocking GC to complete.
+      // blocking watermark. Ensure that only one of those threads is assigned
+      // to run the blocking GC. The rest of the threads should instead wait
+      // for the blocking GC to complete.
       if (native_blocking_gcs_finished_ == initial_gcs_finished) {
-        if (native_blocking_gc_in_progress_) {
+        if (native_blocking_gc_is_assigned_) {
           do {
+            ScopedTrace trace("RegisterNativeAllocation: Wait For Blocking GC Completion");
             native_blocking_gc_cond_->Wait(self);
           } while (native_blocking_gcs_finished_ == initial_gcs_finished);
         } else {
-          native_blocking_gc_in_progress_ = true;
+          native_blocking_gc_is_assigned_ = true;
           run_gc = true;
         }
       }
     }
 
     if (run_gc) {
-      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
+      CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAllocBlocking, false);
       RunFinalization(env, kNativeAllocationFinalizeTimeout);
       CHECK(!env->ExceptionCheck());
 
       MutexLock mu(self, *native_blocking_gc_lock_);
+      native_blocking_gc_is_assigned_ = false;
       native_blocking_gc_in_progress_ = false;
       native_blocking_gcs_finished_++;
       native_blocking_gc_cond_->Broadcast(self);
@@ -3957,7 +3971,7 @@
     // Trigger another GC because there have been enough native bytes
     // allocated since the last GC.
     if (IsGcConcurrent()) {
-      RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAllocBackground, /*force_full*/true);
+      RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAlloc, /*force_full*/true);
     } else {
       CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
     }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index aa123d8..7287178 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -1237,10 +1237,20 @@
   // old_native_bytes_allocated_ and new_native_bytes_allocated_.
   Atomic<size_t> old_native_bytes_allocated_;
 
-  // Used for synchronization of blocking GCs triggered by
-  // RegisterNativeAllocation.
+  // Used for synchronization when multiple threads call into
+  // RegisterNativeAllocation and require blocking GC.
+  // * If a previous blocking GC is in progress, all threads will wait for
+  // that GC to complete, then wait for one of the threads to complete another
+  // blocking GC.
+  // * If a blocking GC is assigned but not in progress, a thread has been
+  // assigned to run a blocking GC but has not started yet. Threads will wait
+  // for the assigned blocking GC to complete.
+  // * If a blocking GC is not assigned nor in progress, the first thread will
+  // run a blocking GC and signal to other threads that blocking GC has been
+  // assigned.
   Mutex* native_blocking_gc_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::unique_ptr<ConditionVariable> native_blocking_gc_cond_ GUARDED_BY(native_blocking_gc_lock_);
+  bool native_blocking_gc_is_assigned_ GUARDED_BY(native_blocking_gc_lock_);
   bool native_blocking_gc_in_progress_ GUARDED_BY(native_blocking_gc_lock_);
   uint32_t native_blocking_gcs_finished_ GUARDED_BY(native_blocking_gc_lock_);
 
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 06638e7..2de4f19 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -40,7 +40,7 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_ID_OFFSET), (static_cast<int32_t>(art::Thread:: ThinLockIdOffset<art::kRuntimePointerSize>().Int32Value())))
 #define THREAD_IS_GC_MARKING_OFFSET 52
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_IS_GC_MARKING_OFFSET), (static_cast<int32_t>(art::Thread:: IsGcMarkingOffset<art::kRuntimePointerSize>().Int32Value())))
-#define THREAD_CARD_TABLE_OFFSET 128
+#define THREAD_CARD_TABLE_OFFSET 136
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CARD_TABLE_OFFSET), (static_cast<int32_t>(art::Thread:: CardTableOffset<art::kRuntimePointerSize>().Int32Value())))
 #define CODEITEM_INSNS_OFFSET 16
 DEFINE_CHECK_EQ(static_cast<int32_t>(CODEITEM_INSNS_OFFSET), (static_cast<int32_t>(__builtin_offsetof(art::DexFile::CodeItem, insns_))))
diff --git a/runtime/image.cc b/runtime/image.cc
index b2486a1..489a53b 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '3', '\0' };  // hash-based DexCache fields
+const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '4', '\0' };  // Thread.interrupted
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 869d430..74e6cd2 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -469,6 +469,7 @@
     UNIMPLEMENTED_CASE(UnsafeFullFence /* ()V */)
     UNIMPLEMENTED_CASE(ReferenceGetReferent /* ()Ljava/lang/Object; */)
     UNIMPLEMENTED_CASE(IntegerValueOf /* (I)Ljava/lang/Integer; */)
+    UNIMPLEMENTED_CASE(ThreadInterrupted /* ()Z */)
     case Intrinsics::kNone:
       res = false;
       break;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index b32b272..ae474da 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -20,6 +20,7 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/memory_tool.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
@@ -308,20 +309,23 @@
   Thread* self = Thread::Current();
   DCHECK(Runtime::Current()->IsShuttingDown(self));
   if (thread_pool_ != nullptr) {
-    ThreadPool* cache = nullptr;
+    std::unique_ptr<ThreadPool> pool;
     {
       ScopedSuspendAll ssa(__FUNCTION__);
       // Clear thread_pool_ field while the threads are suspended.
       // A mutator in the 'AddSamples' method will check against it.
-      cache = thread_pool_.release();
+      pool = std::move(thread_pool_);
     }
-    cache->StopWorkers(self);
-    cache->RemoveAllTasks(self);
+
+    // When running sanitized, let all tasks finish to not leak. Otherwise just clear the queue.
+    if (!RUNNING_ON_MEMORY_TOOL) {
+      pool->StopWorkers(self);
+      pool->RemoveAllTasks(self);
+    }
     // We could just suspend all threads, but we know those threads
     // will finish in a short period, so it's not worth adding a suspend logic
     // here. Besides, this is only done for shutdown.
-    cache->Wait(self, false, false);
-    delete cache;
+    pool->Wait(self, false, false);
   }
 }
 
@@ -353,6 +357,7 @@
   DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG_STREAM(INFO));
+    Runtime::Current()->DumpDeoptimizations(LOG_STREAM(INFO));
   }
   DeleteThreadPool();
   if (jit_compiler_handle_ != nullptr) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index e9a5ae5..5232252 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -18,6 +18,7 @@
 
 #include <sstream>
 
+#include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/stl_util.h"
@@ -149,7 +150,6 @@
       used_memory_for_code_(0),
       number_of_compilations_(0),
       number_of_osr_compilations_(0),
-      number_of_deoptimizations_(0),
       number_of_collections_(0),
       histogram_stack_map_memory_use_("Memory used for stack maps", 16),
       histogram_code_memory_use_("Memory used for compiled code", 16),
@@ -323,12 +323,20 @@
   return data - ComputeRootTableSize(roots);
 }
 
+// Use a sentinel for marking entries in the JIT table that have been cleared.
+// This helps diagnosing in case the compiled code tries to wrongly access such
+// entries.
+static mirror::Class* const weak_sentinel =
+    reinterpret_cast<mirror::Class*>(Context::kBadGprBase + 0xff);
+
 // Helper for the GC to process a weak class in a JIT root table.
-static inline void ProcessWeakClass(GcRoot<mirror::Class>* root_ptr, IsMarkedVisitor* visitor)
+static inline void ProcessWeakClass(GcRoot<mirror::Class>* root_ptr,
+                                    IsMarkedVisitor* visitor,
+                                    mirror::Class* update)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // This does not need a read barrier because this is called by GC.
   mirror::Class* cls = root_ptr->Read<kWithoutReadBarrier>();
-  if (cls != nullptr) {
+  if (cls != nullptr && cls != weak_sentinel) {
     DCHECK((cls->IsClass<kDefaultVerifyFlags, kWithoutReadBarrier>()));
     // Look at the classloader of the class to know if it has been unloaded.
     // This does not need a read barrier because this is called by GC.
@@ -343,7 +351,7 @@
       }
     } else {
       // The class loader is not live, clear the entry.
-      *root_ptr = GcRoot<mirror::Class>(nullptr);
+      *root_ptr = GcRoot<mirror::Class>(update);
     }
   }
 }
@@ -357,7 +365,7 @@
     for (uint32_t i = 0; i < number_of_roots; ++i) {
       // This does not need a read barrier because this is called by GC.
       mirror::Object* object = roots[i].Read<kWithoutReadBarrier>();
-      if (object == nullptr) {
+      if (object == nullptr || object == weak_sentinel) {
         // entry got deleted in a previous sweep.
       } else if (object->IsString<kDefaultVerifyFlags, kWithoutReadBarrier>()) {
         mirror::Object* new_object = visitor->IsMarked(object);
@@ -372,7 +380,8 @@
           roots[i] = GcRoot<mirror::Object>(new_object);
         }
       } else {
-        ProcessWeakClass(reinterpret_cast<GcRoot<mirror::Class>*>(&roots[i]), visitor);
+        ProcessWeakClass(
+            reinterpret_cast<GcRoot<mirror::Class>*>(&roots[i]), visitor, weak_sentinel);
       }
     }
   }
@@ -381,7 +390,7 @@
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       InlineCache* cache = &info->cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
-        ProcessWeakClass(&cache->classes_[j], visitor);
+        ProcessWeakClass(&cache->classes_[j], visitor, nullptr);
       }
     }
   }
@@ -670,13 +679,13 @@
   }
   method->SetProfilingInfo(nullptr);
   ScopedCodeCacheWrite ccw(code_map_.get());
-  for (auto code_iter = method_code_map_.begin();
-       code_iter != method_code_map_.end();
-       ++code_iter) {
+  for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
     if (code_iter->second == method) {
       FreeCode(code_iter->first);
-      method_code_map_.erase(code_iter);
+      code_iter = method_code_map_.erase(code_iter);
+      continue;
     }
+    ++code_iter;
   }
   auto code_map = osr_code_map_.find(method);
   if (code_map != osr_code_map_.end()) {
@@ -1416,8 +1425,6 @@
       osr_code_map_.erase(it);
     }
   }
-  MutexLock mu(Thread::Current(), lock_);
-  number_of_deoptimizations_++;
 }
 
 uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
@@ -1456,7 +1463,6 @@
      << "Total number of JIT compilations: " << number_of_compilations_ << "\n"
      << "Total number of JIT compilations for on stack replacement: "
         << number_of_osr_compilations_ << "\n"
-     << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n"
      << "Total number of JIT code cache collections: " << number_of_collections_ << std::endl;
   histogram_stack_map_memory_use_.PrintMemoryUse(os);
   histogram_code_memory_use_.PrintMemoryUse(os);
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index db214e7..612d06b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -384,9 +384,6 @@
   // Number of compilations for on-stack-replacement done throughout the lifetime of the JIT.
   size_t number_of_osr_compilations_ GUARDED_BY(lock_);
 
-  // Number of deoptimizations done throughout the lifetime of the JIT.
-  size_t number_of_deoptimizations_ GUARDED_BY(lock_);
-
   // Number of code cache collections done throughout the lifetime of the JIT.
   size_t number_of_collections_ GUARDED_BY(lock_);
 
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 52649c7..9ea5ece 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -18,11 +18,18 @@
 
 #include "errno.h"
 #include <limits.h>
+#include <string>
 #include <vector>
 #include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <zlib.h>
+#include <base/time_utils.h>
 
 #include "base/mutex.h"
 #include "base/scoped_flock.h"
@@ -33,13 +40,14 @@
 #include "os.h"
 #include "safe_map.h"
 #include "utils.h"
+#include "android-base/file.h"
 
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-// Last profile version: fix profman merges. Update profile version to force
-// regeneration of possibly faulty profiles.
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '5', '\0' };
+// Last profile version: Instead of method index, put the difference with the last
+// method's index.
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '7', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -115,7 +123,11 @@
   ScopedTrace trace(__PRETTY_FUNCTION__);
   ScopedFlock flock;
   std::string error;
-  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+  int flags = O_RDWR | O_NOFOLLOW | O_CLOEXEC;
+  // There's no need to fsync profile data right away. We get many chances
+  // to write it again in case something goes wrong. We can rely on a simple
+  // close(), no sync, and let to the kernel decide when to write to disk.
+  if (!flock.Init(filename.c_str(), flags, /*block*/false, /*flush_on_close*/false, &error)) {
     LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
     return false;
   }
@@ -205,12 +217,12 @@
 
 /**
  * Serialization format:
- *    magic,version,number_of_dex_files
- *    dex_location1,number_of_classes1,methods_region_size,dex_location_checksum1, \
+ *    magic,version,number_of_dex_files,uncompressed_size_of_zipped_data,compressed_data_size,
+ *    zipped[dex_location1,number_of_classes1,methods_region_size,dex_location_checksum1, \
  *        method_encoding_11,method_encoding_12...,class_id1,class_id2...
  *    dex_location2,number_of_classes2,methods_region_size,dex_location_checksum2, \
  *        method_encoding_21,method_encoding_22...,,class_id1,class_id2...
- *    .....
+ *    .....]
  * The method_encoding is:
  *    method_id,number_of_inline_caches,inline_cache1,inline_cache2...
  * The inline_cache is:
@@ -224,28 +236,53 @@
  *    When present, there will be no class ids following.
  **/
 bool ProfileCompilationInfo::Save(int fd) {
+  uint64_t start = NanoTime();
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  // Cache at most 50KB before writing.
-  static constexpr size_t kMaxSizeToKeepBeforeWriting = 50 * KB;
   // Use a vector wrapper to avoid keeping track of offsets when we add elements.
   std::vector<uint8_t> buffer;
-  WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic));
-  WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion));
+  if (!WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic))) {
+    return false;
+  }
+  if (!WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion))) {
+    return false;
+  }
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
   AddUintToBuffer(&buffer, static_cast<uint8_t>(info_.size()));
 
+  uint32_t required_capacity = 0;
+  for (const DexFileData* dex_data_ptr : info_) {
+    const DexFileData& dex_data = *dex_data_ptr;
+    uint32_t methods_region_size = GetMethodsRegionSize(dex_data);
+    required_capacity += kLineHeaderSize +
+        dex_data.profile_key.size() +
+        sizeof(uint16_t) * dex_data.class_set.size() +
+        methods_region_size;
+  }
+  if (required_capacity > kProfileSizeErrorThresholdInBytes) {
+    LOG(ERROR) << "Profile data size exceeds "
+               << std::to_string(kProfileSizeErrorThresholdInBytes)
+               << " bytes. Profile will not be written to disk.";
+    return false;
+  }
+  if (required_capacity > kProfileSizeWarningThresholdInBytes) {
+    LOG(WARNING) << "Profile data size exceeds "
+                 << std::to_string(kProfileSizeWarningThresholdInBytes);
+  }
+  AddUintToBuffer(&buffer, required_capacity);
+  if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
+    return false;
+  }
+  // Make sure that the buffer has enough capacity to avoid repeated resizings
+  // while we add data.
+  buffer.reserve(required_capacity);
+  buffer.clear();
+
   // Dex files must be written in the order of their profile index. This
   // avoids writing the index in the output file and simplifies the parsing logic.
   for (const DexFileData* dex_data_ptr : info_) {
     const DexFileData& dex_data = *dex_data_ptr;
-    if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
-      if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
-        return false;
-      }
-      buffer.clear();
-    }
 
     // Note that we allow dex files without any methods or classes, so that
     // inline caches can refer valid dex files.
@@ -255,16 +292,8 @@
       return false;
     }
 
-    // Make sure that the buffer has enough capacity to avoid repeated resizings
-    // while we add data.
     uint32_t methods_region_size = GetMethodsRegionSize(dex_data);
-    size_t required_capacity = buffer.size() +
-        kLineHeaderSize +
-        dex_data.profile_key.size() +
-        sizeof(uint16_t) * dex_data.class_set.size() +
-        methods_region_size;
 
-    buffer.reserve(required_capacity);
     DCHECK_LE(dex_data.profile_key.size(), std::numeric_limits<uint16_t>::max());
     DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.profile_key.size()));
@@ -274,19 +303,49 @@
 
     AddStringToBuffer(&buffer, dex_data.profile_key);
 
+    uint16_t last_method_index = 0;
     for (const auto& method_it : dex_data.method_map) {
-      AddUintToBuffer(&buffer, method_it.first);
+      // Store the difference between the method indices. The SafeMap is ordered by
+      // method_id, so the difference will always be non negative.
+      DCHECK_GE(method_it.first, last_method_index);
+      uint16_t diff_with_last_method_index = method_it.first - last_method_index;
+      last_method_index = method_it.first;
+      AddUintToBuffer(&buffer, diff_with_last_method_index);
       AddInlineCacheToBuffer(&buffer, method_it.second);
     }
-    for (const auto& class_id : dex_data.class_set) {
-      AddUintToBuffer(&buffer, class_id.index_);
-    }
 
-    DCHECK_LE(required_capacity, buffer.size())
-        << "Failed to add the expected number of bytes in the buffer";
+    uint16_t last_class_index = 0;
+    for (const auto& class_id : dex_data.class_set) {
+      // Store the difference between the class indices. The set is ordered by
+      // class_id, so the difference will always be non negative.
+      DCHECK_GE(class_id.index_, last_class_index);
+      uint16_t diff_with_last_class_index = class_id.index_ - last_class_index;
+      last_class_index = class_id.index_;
+      AddUintToBuffer(&buffer, diff_with_last_class_index);
+    }
   }
 
-  return WriteBuffer(fd, buffer.data(), buffer.size());
+  uint32_t output_size = 0;
+  std::unique_ptr<uint8_t[]> compressed_buffer = DeflateBuffer(buffer.data(),
+                                                               required_capacity,
+                                                               &output_size);
+
+  buffer.clear();
+  AddUintToBuffer(&buffer, output_size);
+
+  if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
+    return false;
+  }
+  if (!WriteBuffer(fd, compressed_buffer.get(), output_size)) {
+    return false;
+  }
+  uint64_t total_time = NanoTime() - start;
+  VLOG(profiler) << "Compressed from "
+                 << std::to_string(required_capacity)
+                 << " to "
+                 << std::to_string(output_size);
+  VLOG(profiler) << "Time to save profile: " << std::to_string(total_time);
+  return true;
 }
 
 void ProfileCompilationInfo::AddInlineCacheToBuffer(std::vector<uint8_t>* buffer,
@@ -580,33 +639,60 @@
                                          uint8_t number_of_dex_files,
                                          const ProfileLineHeader& line_header,
                                          /*out*/std::string* error) {
-  while (buffer.HasMoreData()) {
+  uint32_t unread_bytes_before_operation = buffer.CountUnreadBytes();
+  if (unread_bytes_before_operation < line_header.method_region_size_bytes) {
+    *error += "Profile EOF reached prematurely for ReadMethod";
+    return kProfileLoadBadData;
+  }
+  size_t expected_unread_bytes_after_operation = buffer.CountUnreadBytes()
+      - line_header.method_region_size_bytes;
+  uint16_t last_method_index = 0;
+  while (buffer.CountUnreadBytes() > expected_unread_bytes_after_operation) {
     DexFileData* const data = GetOrAddDexFileData(line_header.dex_location, line_header.checksum);
-    uint16_t method_index;
-    READ_UINT(uint16_t, buffer, method_index, error);
-
+    uint16_t diff_with_last_method_index;
+    READ_UINT(uint16_t, buffer, diff_with_last_method_index, error);
+    uint16_t method_index = last_method_index + diff_with_last_method_index;
+    last_method_index = method_index;
     auto it = data->method_map.FindOrAdd(method_index);
     if (!ReadInlineCache(buffer, number_of_dex_files, &(it->second), error)) {
       return false;
     }
   }
-
+  uint32_t total_bytes_read = unread_bytes_before_operation - buffer.CountUnreadBytes();
+  if (total_bytes_read != line_header.method_region_size_bytes) {
+    *error += "Profile data inconsistent for ReadMethods";
+    return false;
+  }
   return true;
 }
 
 bool ProfileCompilationInfo::ReadClasses(SafeBuffer& buffer,
-                                         uint16_t classes_to_read,
                                          const ProfileLineHeader& line_header,
                                          /*out*/std::string* error) {
-  for (uint16_t i = 0; i < classes_to_read; i++) {
-    uint16_t type_index;
-    READ_UINT(uint16_t, buffer, type_index, error);
+  size_t unread_bytes_before_op = buffer.CountUnreadBytes();
+  if (unread_bytes_before_op < line_header.class_set_size) {
+    *error += "Profile EOF reached prematurely for ReadClasses";
+    return kProfileLoadBadData;
+  }
+
+  uint16_t last_class_index = 0;
+  for (uint16_t i = 0; i < line_header.class_set_size; i++) {
+    uint16_t diff_with_last_class_index;
+    READ_UINT(uint16_t, buffer, diff_with_last_class_index, error);
+    uint16_t type_index = last_class_index + diff_with_last_class_index;
+    last_class_index = type_index;
     if (!AddClassIndex(line_header.dex_location,
                        line_header.checksum,
                        dex::TypeIndex(type_index))) {
       return false;
     }
   }
+  size_t total_bytes_read = unread_bytes_before_op - buffer.CountUnreadBytes();
+  uint32_t expected_bytes_read = line_header.class_set_size * sizeof(uint16_t);
+  if (total_bytes_read != expected_bytes_read) {
+    *error += "Profile data inconsistent for ReadClasses";
+    return false;
+  }
   return true;
 }
 
@@ -646,15 +732,11 @@
   return false;
 }
 
-bool ProfileCompilationInfo::SafeBuffer::HasMoreData() {
-  return ptr_current_ < ptr_end_;
-}
-
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
       int fd,
       const std::string& source,
       /*out*/std::string* error) {
-  size_t byte_count = ptr_end_ - ptr_current_;
+  size_t byte_count = (ptr_end_ - ptr_current_) * sizeof(*ptr_current_);
   uint8_t* buffer = ptr_current_;
   while (byte_count > 0) {
     int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
@@ -671,15 +753,31 @@
   return kProfileLoadSuccess;
 }
 
+size_t ProfileCompilationInfo::SafeBuffer::CountUnreadBytes() {
+  return (ptr_end_ - ptr_current_) * sizeof(*ptr_current_);
+}
+
+const uint8_t* ProfileCompilationInfo::SafeBuffer::GetCurrentPtr() {
+  return ptr_current_;
+}
+
+void ProfileCompilationInfo::SafeBuffer::Advance(size_t data_size) {
+  ptr_current_ += data_size;
+}
+
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
       int fd,
       /*out*/uint8_t* number_of_dex_files,
+      /*out*/uint32_t* uncompressed_data_size,
+      /*out*/uint32_t* compressed_data_size,
       /*out*/std::string* error) {
   // Read magic and version
   const size_t kMagicVersionSize =
     sizeof(kProfileMagic) +
     sizeof(kProfileVersion) +
-    sizeof(uint8_t);  // number of dex files
+    sizeof(uint8_t) +  // number of dex files
+    sizeof(uint32_t) +  // size of uncompressed profile data
+    sizeof(uint32_t);  // size of compressed profile data
 
   SafeBuffer safe_buffer(kMagicVersionSize);
 
@@ -700,6 +798,14 @@
     *error = "Cannot read the number of dex files";
     return kProfileLoadBadData;
   }
+  if (!safe_buffer.ReadUintAndAdvance<uint32_t>(uncompressed_data_size)) {
+    *error = "Cannot read the size of uncompressed data";
+    return kProfileLoadBadData;
+  }
+  if (!safe_buffer.ReadUintAndAdvance<uint32_t>(compressed_data_size)) {
+    *error = "Cannot read the size of compressed data";
+    return kProfileLoadBadData;
+  }
   return kProfileLoadSuccess;
 }
 
@@ -715,17 +821,16 @@
 }
 
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
-      int fd,
-      /*out*/ProfileLineHeader* line_header,
-      /*out*/std::string* error) {
-  SafeBuffer header_buffer(kLineHeaderSize);
-  ProfileLoadSatus status = header_buffer.FillFromFd(fd, "ReadProfileLineHeader", error);
-  if (status != kProfileLoadSuccess) {
-    return status;
+    SafeBuffer& buffer,
+    /*out*/ProfileLineHeader* line_header,
+    /*out*/std::string* error) {
+  if (buffer.CountUnreadBytes() < kLineHeaderSize) {
+    *error += "Profile EOF reached prematurely for ReadProfileLineHeader";
+    return kProfileLoadBadData;
   }
 
   uint16_t dex_location_size;
-  if (!ReadProfileLineHeaderElements(header_buffer, &dex_location_size, line_header, error)) {
+  if (!ReadProfileLineHeaderElements(buffer, &dex_location_size, line_header, error)) {
     return kProfileLoadBadData;
   }
 
@@ -735,18 +840,19 @@
     return kProfileLoadBadData;
   }
 
-  SafeBuffer location_buffer(dex_location_size);
-  status = location_buffer.FillFromFd(fd, "ReadProfileHeaderDexLocation", error);
-  if (status != kProfileLoadSuccess) {
-    return status;
+  if (buffer.CountUnreadBytes() < dex_location_size) {
+    *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
+    return kProfileLoadBadData;
   }
+  const uint8_t* base_ptr = buffer.GetCurrentPtr();
   line_header->dex_location.assign(
-      reinterpret_cast<char*>(location_buffer.Get()), dex_location_size);
+      reinterpret_cast<const char*>(base_ptr), dex_location_size);
+  buffer.Advance(dex_location_size);
   return kProfileLoadSuccess;
 }
 
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
-      int fd,
+      SafeBuffer& buffer,
       uint8_t number_of_dex_files,
       const ProfileLineHeader& line_header,
       /*out*/std::string* error) {
@@ -756,29 +862,13 @@
     return kProfileLoadBadData;
   }
 
-  {
-    SafeBuffer buffer(line_header.method_region_size_bytes);
-    ProfileLoadSatus status = buffer.FillFromFd(fd, "ReadProfileLineMethods", error);
-    if (status != kProfileLoadSuccess) {
-      return status;
-    }
-
-    if (!ReadMethods(buffer, number_of_dex_files, line_header, error)) {
-      return kProfileLoadBadData;
-    }
+  if (!ReadMethods(buffer, number_of_dex_files, line_header, error)) {
+    return kProfileLoadBadData;
   }
 
-  {
-    SafeBuffer buffer(sizeof(uint16_t) * line_header.class_set_size);
-    ProfileLoadSatus status = buffer.FillFromFd(fd, "ReadProfileLineClasses", error);
-    if (status != kProfileLoadSuccess) {
-      return status;
-    }
-    if (!ReadClasses(buffer, line_header.class_set_size, line_header, error)) {
-      return kProfileLoadBadData;
-    }
+  if (!ReadClasses(buffer, line_header, error)) {
+    return kProfileLoadBadData;
   }
-
   return kProfileLoadSuccess;
 }
 
@@ -817,39 +907,135 @@
   }
   // Read profile header: magic + version + number_of_dex_files.
   uint8_t number_of_dex_files;
-  ProfileLoadSatus status = ReadProfileHeader(fd, &number_of_dex_files, error);
+  uint32_t uncompressed_data_size;
+  uint32_t compressed_data_size;
+  ProfileLoadSatus status = ReadProfileHeader(fd,
+                                              &number_of_dex_files,
+                                              &uncompressed_data_size,
+                                              &compressed_data_size,
+                                              error);
+
   if (status != kProfileLoadSuccess) {
     return status;
   }
 
+  if (uncompressed_data_size > kProfileSizeErrorThresholdInBytes) {
+    LOG(ERROR) << "Profile data size exceeds "
+               << std::to_string(kProfileSizeErrorThresholdInBytes)
+               << " bytes";
+    return kProfileLoadBadData;
+  }
+  if (uncompressed_data_size > kProfileSizeWarningThresholdInBytes) {
+    LOG(WARNING) << "Profile data size exceeds "
+                 << std::to_string(kProfileSizeWarningThresholdInBytes)
+                 << " bytes";
+  }
+
+  std::unique_ptr<uint8_t[]> compressed_data(new uint8_t[compressed_data_size]);
+  bool bytes_read_success =
+      android::base::ReadFully(fd, compressed_data.get(), compressed_data_size);
+
+  if (testEOF(fd) != 0) {
+    *error += "Unexpected data in the profile file.";
+    return kProfileLoadBadData;
+  }
+
+  if (!bytes_read_success) {
+    *error += "Unable to read compressed profile data";
+    return kProfileLoadBadData;
+  }
+
+  SafeBuffer uncompressed_data(uncompressed_data_size);
+
+  int ret = InflateBuffer(compressed_data.get(),
+                          compressed_data_size,
+                          uncompressed_data_size,
+                          uncompressed_data.Get());
+
+  if (ret != Z_STREAM_END) {
+    *error += "Error reading uncompressed profile data";
+    return kProfileLoadBadData;
+  }
+
   for (uint8_t k = 0; k < number_of_dex_files; k++) {
     ProfileLineHeader line_header;
 
     // First, read the line header to get the amount of data we need to read.
-    status = ReadProfileLineHeader(fd, &line_header, error);
+    status = ReadProfileLineHeader(uncompressed_data, &line_header, error);
     if (status != kProfileLoadSuccess) {
       return status;
     }
 
     // Now read the actual profile line.
-    status = ReadProfileLine(fd, number_of_dex_files, line_header, error);
+    status = ReadProfileLine(uncompressed_data, number_of_dex_files, line_header, error);
     if (status != kProfileLoadSuccess) {
       return status;
     }
   }
 
   // Check that we read everything and that profiles don't contain junk data.
-  int result = testEOF(fd);
-  if (result == 0) {
-    return kProfileLoadSuccess;
-  } else if (result < 0) {
-    return kProfileLoadIOError;
-  } else {
+  if (uncompressed_data.CountUnreadBytes() > 0) {
     *error = "Unexpected content in the profile file";
     return kProfileLoadBadData;
+  } else {
+    return kProfileLoadSuccess;
   }
 }
 
+std::unique_ptr<uint8_t[]> ProfileCompilationInfo::DeflateBuffer(const uint8_t* in_buffer,
+                                                                 uint32_t in_size,
+                                                                 uint32_t* compressed_data_size) {
+  z_stream strm;
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.opaque = Z_NULL;
+  int ret = deflateInit(&strm, 1);
+  if (ret != Z_OK) {
+    return nullptr;
+  }
+
+  uint32_t out_size = deflateBound(&strm, in_size);
+
+  std::unique_ptr<uint8_t[]> compressed_buffer(new uint8_t[out_size]);
+  strm.avail_in = in_size;
+  strm.next_in = const_cast<uint8_t*>(in_buffer);
+  strm.avail_out = out_size;
+  strm.next_out = &compressed_buffer[0];
+  ret = deflate(&strm, Z_FINISH);
+  if (ret == Z_STREAM_ERROR) {
+    return nullptr;
+  }
+  *compressed_data_size = out_size - strm.avail_out;
+  deflateEnd(&strm);
+  return compressed_buffer;
+}
+
+int ProfileCompilationInfo::InflateBuffer(const uint8_t* in_buffer,
+                                          uint32_t in_size,
+                                          uint32_t expected_uncompressed_data_size,
+                                          uint8_t* out_buffer) {
+  z_stream strm;
+
+  /* allocate inflate state */
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.opaque = Z_NULL;
+  strm.avail_in = in_size;
+  strm.next_in = const_cast<uint8_t*>(in_buffer);
+  strm.avail_out = expected_uncompressed_data_size;
+  strm.next_out = out_buffer;
+
+  int ret;
+  inflateInit(&strm);
+  ret = inflate(&strm, Z_NO_FLUSH);
+
+  if (strm.avail_in != 0 || strm.avail_out != 0) {
+    return Z_DATA_ERROR;
+  }
+  inflateEnd(&strm);
+  return ret;
+}
+
 bool ProfileCompilationInfo::MergeWith(const ProfileCompilationInfo& other) {
   // First verify that all checksums match. This will avoid adding garbage to
   // the current profile info.
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index f68ed5d..9e47cc1 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -284,6 +284,9 @@
     kProfileLoadSuccess
   };
 
+  const uint32_t kProfileSizeWarningThresholdInBytes = 500000U;
+  const uint32_t kProfileSizeErrorThresholdInBytes = 1000000U;
+
   // Internal representation of the profile information belonging to a dex file.
   // Note that we could do without profile_key (the key used to encode the dex
   // file in the profile) and profile_index (the index of the dex file in the
@@ -353,6 +356,21 @@
   // Checks if the profile is empty.
   bool IsEmpty() const;
 
+  // Inflate the input buffer (in_buffer) of size in_size. It returns a buffer of
+  // compressed data for the input buffer of "compressed_data_size" size.
+  std::unique_ptr<uint8_t[]> DeflateBuffer(const uint8_t* in_buffer,
+                                           uint32_t in_size,
+                                           /*out*/uint32_t* compressed_data_size);
+
+  // Inflate the input buffer(in_buffer) of size in_size. out_size is the expected output
+  // size of the buffer. It puts the output in out_buffer. It returns Z_STREAM_END on
+  // success. On error, it returns Z_STREAM_ERROR if the compressed data is inconsistent
+  // and Z_DATA_ERROR if the stream ended prematurely or the stream has extra data.
+  int InflateBuffer(const uint8_t* in_buffer,
+                    uint32_t in_size,
+                    uint32_t out_size,
+                    /*out*/uint8_t* out_buffer);
+
   // Parsing functionality.
 
   // The information present in the header of each profile line.
@@ -376,6 +394,10 @@
                                 const std::string& source,
                                 /*out*/std::string* error);
 
+    ProfileLoadSatus FillFromBuffer(uint8_t* buffer_ptr,
+                                    const std::string& source,
+                                    /*out*/std::string* error);
+
     // Reads an uint value (high bits to low bits) and advances the current pointer
     // with the number of bits read.
     template <typename T> bool ReadUintAndAdvance(/*out*/ T* value);
@@ -384,16 +406,22 @@
     // equal it advances the current pointer by data_size.
     bool CompareAndAdvance(const uint8_t* data, size_t data_size);
 
-    // Returns true if the buffer has more data to read.
-    bool HasMoreData();
+    // Advances current pointer by data_size.
+    void Advance(size_t data_size);
+
+    // Returns the count of unread bytes.
+    size_t CountUnreadBytes();
+
+    // Returns the current pointer.
+    const uint8_t* GetCurrentPtr();
 
     // Get the underlying raw buffer.
     uint8_t* Get() { return storage_.get(); }
 
    private:
     std::unique_ptr<uint8_t[]> storage_;
-    uint8_t* ptr_current_;
     uint8_t* ptr_end_;
+    uint8_t* ptr_current_;
   };
 
   // Entry point for profile loding functionality.
@@ -403,10 +431,12 @@
   // lines into number_of_dex_files.
   ProfileLoadSatus ReadProfileHeader(int fd,
                                      /*out*/uint8_t* number_of_dex_files,
+                                     /*out*/uint32_t* size_uncompressed_data,
+                                     /*out*/uint32_t* size_compressed_data,
                                      /*out*/std::string* error);
 
   // Read the header of a profile line from the given fd.
-  ProfileLoadSatus ReadProfileLineHeader(int fd,
+  ProfileLoadSatus ReadProfileLineHeader(SafeBuffer& buffer,
                                          /*out*/ProfileLineHeader* line_header,
                                          /*out*/std::string* error);
 
@@ -417,14 +447,13 @@
                                      /*out*/std::string* error);
 
   // Read a single profile line from the given fd.
-  ProfileLoadSatus ReadProfileLine(int fd,
+  ProfileLoadSatus ReadProfileLine(SafeBuffer& buffer,
                                    uint8_t number_of_dex_files,
                                    const ProfileLineHeader& line_header,
                                    /*out*/std::string* error);
 
   // Read all the classes from the buffer into the profile `info_` structure.
   bool ReadClasses(SafeBuffer& buffer,
-                   uint16_t classes_to_read,
                    const ProfileLineHeader& line_header,
                    /*out*/std::string* error);
 
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index d190bdf..1441987 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -212,6 +212,10 @@
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+
+  // Resolve any new registered locations.
+  ResolveTrackedLocations();
+
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   std::set<DexCacheResolvedClasses> resolved_classes =
       class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
@@ -260,6 +264,10 @@
 
 bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+
+  // Resolve any new registered locations.
+  ResolveTrackedLocations();
+
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
     // Make a copy so that we don't hold the lock while doing I/O.
@@ -497,17 +505,34 @@
   return instance_ != nullptr;
 }
 
-void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
-                                       const std::vector<std::string>& code_paths) {
-  auto it = tracked_dex_base_locations_.find(output_filename);
-  if (it == tracked_dex_base_locations_.end()) {
-    tracked_dex_base_locations_.Put(output_filename,
-                                    std::set<std::string>(code_paths.begin(), code_paths.end()));
+static void AddTrackedLocationsToMap(const std::string& output_filename,
+                                     const std::vector<std::string>& code_paths,
+                                     SafeMap<std::string, std::set<std::string>>* map) {
+  auto it = map->find(output_filename);
+  if (it == map->end()) {
+    map->Put(output_filename, std::set<std::string>(code_paths.begin(), code_paths.end()));
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
+void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::vector<std::string>& code_paths) {
+  // Add the code paths to the list of tracked location.
+  AddTrackedLocationsToMap(output_filename, code_paths, &tracked_dex_base_locations_);
+  // The code paths may contain symlinks which could fool the profiler.
+  // If the dex file is compiled with an absolute location but loaded with symlink
+  // the profiler could skip the dex due to location mismatch.
+  // To avoid this, we add the code paths to the temporary cache of 'to_be_resolved'
+  // locations. When the profiler thread executes we will resolve the paths to their
+  // real paths.
+  // Note that we delay taking the realpath to avoid spending more time than needed
+  // when registering location (as it is done during app launch).
+  AddTrackedLocationsToMap(output_filename,
+                           code_paths,
+                           &tracked_dex_base_locations_to_be_resolved_);
+}
+
 void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
@@ -556,4 +581,38 @@
   return false;
 }
 
+void ProfileSaver::ResolveTrackedLocations() {
+  SafeMap<std::string, std::set<std::string>> locations_to_be_resolved;
+  {
+    // Make a copy so that we don't hold the lock while doing I/O.
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    locations_to_be_resolved = tracked_dex_base_locations_to_be_resolved_;
+    tracked_dex_base_locations_to_be_resolved_.clear();
+  }
+
+  // Resolve the locations.
+  SafeMap<std::string, std::vector<std::string>> resolved_locations_map;
+  for (const auto& it : locations_to_be_resolved) {
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+    auto resolved_locations_it = resolved_locations_map.Put(
+        filename,
+        std::vector<std::string>(locations.size()));
+
+    for (const auto& location : locations) {
+      UniqueCPtr<const char[]> location_real(realpath(location.c_str(), nullptr));
+      // Note that it's ok if we cannot get the real path.
+      if (location_real != nullptr) {
+        resolved_locations_it->second.emplace_back(location_real.get());
+      }
+    }
+  }
+
+  // Add the resolved locations to the tracked collection.
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  for (const auto& it : resolved_locations_map) {
+    AddTrackedLocationsToMap(it.first, it.second, &tracked_dex_base_locations_);
+  }
+}
+
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index be2bffc..bd539a4 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -112,6 +112,10 @@
 
   void DumpInfo(std::ostream& os);
 
+  // Resolve the realpath of the locations stored in tracked_dex_base_locations_to_be_resolved_
+  // and put the result in tracked_dex_base_locations_.
+  void ResolveTrackedLocations() REQUIRES(!Locks::profiler_lock_);
+
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
   // Profile saver thread.
@@ -119,11 +123,17 @@
 
   jit::JitCodeCache* jit_code_cache_;
 
-  // Collection of code paths that the profiles tracks.
+  // Collection of code paths that the profiler tracks.
   // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
 
+  // Collection of code paths that the profiler tracks but may note have been resolved
+  // to their realpath. The resolution is done async to minimize the time it takes for
+  // someone to register a path.
+  SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_to_be_resolved_
+      GUARDED_BY(Locks::profiler_lock_);
+
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
   uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
   uint32_t jit_activity_notifications_;
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 7d80d2c..1bd095a 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -90,13 +90,17 @@
 void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
   InlineCache* cache = GetInlineCache(dex_pc);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    mirror::Class* existing = cache->classes_[i].Read();
-    if (existing == cls) {
+    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    mirror::Class* marked = ReadBarrier::IsMarked(existing);
+    if (marked == cls) {
       // Receiver type is already in the cache, nothing else to do.
       return;
-    } else if (existing == nullptr) {
+    } else if (marked == nullptr) {
       // Cache entry is empty, try to put `cls` in it.
-      GcRoot<mirror::Class> expected_root(nullptr);
+      // Note: it's ok to spin on 'existing' here: if 'existing' is not null, that means
+      // it is a stalled heap address, which will only be cleared during SweepSystemWeaks,
+      // *after* this thread hits a suspend point.
+      GcRoot<mirror::Class> expected_root(existing);
       GcRoot<mirror::Class> desired_root(cls);
       if (!reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&cache->classes_[i])->
               CompareExchangeStrongSequentiallyConsistent(expected_root, desired_root)) {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index e365b42..bb33047 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -662,7 +662,7 @@
     monitor_lock_.Unlock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
-    if (self->IsInterruptedLocked()) {
+    if (self->IsInterrupted()) {
       was_interrupted = true;
     } else {
       // Wait for a notification or a timeout to occur.
@@ -672,7 +672,7 @@
         DCHECK(why == kTimedWaiting || why == kSleeping) << why;
         self->GetWaitConditionVariable()->TimedWait(self, ms, ns);
       }
-      was_interrupted = self->IsInterruptedLocked();
+      was_interrupted = self->IsInterrupted();
     }
   }
 
@@ -697,10 +697,7 @@
      * The doc sayeth: "The interrupted status of the current thread is
      * cleared when this exception is thrown."
      */
-    {
-      MutexLock mu(self, *self->GetWaitMutex());
-      self->SetInterruptedLocked(false);
-    }
+    self->SetInterrupted(false);
     self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
   }
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0617dae..77554e8 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -622,6 +622,31 @@
   return env->NewStringUTF(new_filter_str.c_str());
 }
 
+static jstring DexFile_getSafeModeCompilerFilter(JNIEnv* env,
+                                                 jclass javeDexFileClass ATTRIBUTE_UNUSED,
+                                                 jstring javaCompilerFilter) {
+  ScopedUtfChars compiler_filter(env, javaCompilerFilter);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  CompilerFilter::Filter filter;
+  if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) {
+    return javaCompilerFilter;
+  }
+
+  CompilerFilter::Filter new_filter = CompilerFilter::GetSafeModeFilterFrom(filter);
+
+  // Filter stayed the same, return input.
+  if (filter == new_filter) {
+    return javaCompilerFilter;
+  }
+
+  // Create a new string object and return.
+  std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter);
+  return env->NewStringUTF(new_filter_str.c_str());
+}
+
 static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) {
   const OatFile* oat_file = nullptr;
   std::vector<const DexFile*> dex_files;
@@ -695,6 +720,9 @@
   NATIVE_METHOD(DexFile,
                 getNonProfileGuidedCompilerFilter,
                 "(Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile,
+                getSafeModeCompilerFilter,
+                "(Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile, getDexFileStatus,
                 "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index d77cfa1..cd8315c 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -118,7 +118,15 @@
       for (int signal = 0; signal < _NSIG; ++signal) {
         android::NativeBridgeSignalHandlerFn fn = android::NativeBridgeGetSignalHandler(signal);
         if (fn != nullptr) {
-          AddSpecialSignalHandlerFn(signal, fn);
+          sigset_t mask;
+          sigfillset(&mask);
+          SigchainAction sa = {
+            .sc_sigaction = fn,
+            .sc_mask = mask,
+            // The native bridge signal might not return back to sigchain's handler.
+            .sc_flags = SIGCHAIN_ALLOW_NORETURN,
+          };
+          AddSpecialSignalHandlerFn(signal, &sa);
         }
       }
 #endif
diff --git a/runtime/oat.h b/runtime/oat.h
index 9b2227b..b7c715c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Revert concurrent graying for immune spaces.
-  static constexpr uint8_t kOatVersion[] = { '1', '2', '2', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' };  // ARM Baker narrow thunks.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
@@ -176,6 +175,7 @@
 
   ~OatMethodOffsets();
 
+  OatMethodOffsets(const OatMethodOffsets&) = default;
   OatMethodOffsets& operator=(const OatMethodOffsets&) = default;
 
   uint32_t code_offset_;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index eafa77f..603bbbf 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -68,19 +68,34 @@
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable)
-{ }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const char* oat_location,
-                                   const InstructionSet isa,
-                                   bool load_executable)
     : isa_(isa),
       load_executable_(load_executable),
       odex_(this, /*is_oat_location*/ false),
       oat_(this, /*is_oat_location*/ true) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
-  dex_location_.assign(dex_location);
+
+  // Try to get the realpath for the dex location.
+  //
+  // This is OK with respect to dalvik cache naming scheme because we never
+  // generate oat files starting from symlinks which go into dalvik cache.
+  // (recall that the oat files in dalvik cache are encoded by replacing '/'
+  // with '@' in the path).
+  // The boot image oat files (which are symlinked in dalvik-cache) are not
+  // loaded via the oat file assistant.
+  //
+  // The only case when the dex location may resolve to a different path
+  // is for secondary dex files (e.g. /data/user/0 symlinks to /data/data and
+  // the app is free to create its own internal layout). Related to this it is
+  // worthwhile to mention that installd resolves the secondary dex location
+  // before calling dex2oat.
+  UniqueCPtr<const char[]> dex_location_real(realpath(dex_location, nullptr));
+  if (dex_location_real != nullptr) {
+    dex_location_.assign(dex_location_real.get());
+  } else {
+    // If we can't get the realpath of the location there's not much point in trying to move on.
+    PLOG(ERROR) << "Could not get the realpath of dex_location " << dex_location;
+    return;
+  }
 
   if (load_executable_ && isa != kRuntimeISA) {
     LOG(WARNING) << "OatFileAssistant: Load executable specified, "
@@ -98,15 +113,27 @@
   }
 
   // Get the oat filename.
-  if (oat_location != nullptr) {
-    oat_.Reset(oat_location);
+  std::string oat_file_name;
+  if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
+    oat_.Reset(oat_file_name);
   } else {
-    std::string oat_file_name;
-    if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) {
-      oat_.Reset(oat_file_name);
-    } else {
-      LOG(WARNING) << "Failed to determine oat file name for dex location "
+    LOG(WARNING) << "Failed to determine oat file name for dex location "
         << dex_location_ << ": " << error_msg;
+  }
+
+  // Check if the dex directory is writable.
+  // This will be needed in most uses of OatFileAssistant and so it's OK to
+  // compute it eagerly. (the only use which will not make use of it is
+  // OatFileAssistant::GetStatusDump())
+  size_t pos = dex_location_.rfind('/');
+  if (pos == std::string::npos) {
+    LOG(WARNING) << "Failed to determine dex file parent directory: " << dex_location_;
+  } else {
+    std::string parent = dex_location_.substr(0, pos);
+    if (access(parent.c_str(), W_OK) == 0) {
+      dex_parent_writable_ = true;
+    } else {
+      VLOG(oat) << "Dex parent of " << dex_location_ << " is not writable: " << strerror(errno);
     }
   }
 }
@@ -139,12 +166,17 @@
   CHECK(error_msg != nullptr);
   CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
-  const std::string* oat_file_name = oat_.Filename();
-  if (oat_file_name == nullptr) {
-    *error_msg = "Failed to determine lock file";
-    return false;
-  }
-  std::string lock_file_name = *oat_file_name + ".flock";
+  // Note the lock will only succeed for secondary dex files and in test
+  // environment.
+  //
+  // The lock *will fail* for all primary apks in a production environment.
+  // The app does not have permissions to create locks next to its dex location
+  // (be it system, data or vendor parition). We also cannot use the odex or
+  // oat location for the same reasoning.
+  //
+  // This is best effort and if it fails it's unlikely that we will be able
+  // to generate oat files anyway.
+  std::string lock_file_name = dex_location_ + "." + GetInstructionSetString(isa_) + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
     unlink(lock_file_name.c_str());
@@ -170,7 +202,7 @@
   CHECK(filter != nullptr);
   CHECK(error_msg != nullptr);
 
-  *filter = CompilerFilter::kDefaultCompilerFilter;
+  *filter = OatFileAssistant::kDefaultCompilerFilterForDexLoading;
   for (StringPiece option : Runtime::Current()->GetCompilerOptions()) {
     if (option.starts_with("--compiler-filter=")) {
       const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
@@ -207,7 +239,7 @@
     case kDex2OatForBootImage:
     case kDex2OatForRelocation:
     case kDex2OatForFilter:
-      return GenerateOatFile(error_msg);
+      return GenerateOatFileNoChecks(info, error_msg);
   }
   UNREACHABLE();
 }
@@ -479,8 +511,110 @@
   return kOatUpToDate;
 }
 
-OatFileAssistant::ResultOfAttemptToUpdate
-OatFileAssistant::GenerateOatFile(std::string* error_msg) {
+static bool DexLocationToOdexNames(const std::string& location,
+                                   InstructionSet isa,
+                                   std::string* odex_filename,
+                                   std::string* oat_dir,
+                                   std::string* isa_dir,
+                                   std::string* error_msg) {
+  CHECK(odex_filename != nullptr);
+  CHECK(error_msg != nullptr);
+
+  // The odex file name is formed by replacing the dex_location extension with
+  // .odex and inserting an oat/<isa> directory. For example:
+  //   location = /foo/bar/baz.jar
+  //   odex_location = /foo/bar/oat/<isa>/baz.odex
+
+  // Find the directory portion of the dex location and add the oat/<isa>
+  // directory.
+  size_t pos = location.rfind('/');
+  if (pos == std::string::npos) {
+    *error_msg = "Dex location " + location + " has no directory.";
+    return false;
+  }
+  std::string dir = location.substr(0, pos+1);
+  // Add the oat directory.
+  dir += "oat";
+  if (oat_dir != nullptr) {
+    *oat_dir = dir;
+  }
+  // Add the isa directory
+  dir += "/" + std::string(GetInstructionSetString(isa));
+  if (isa_dir != nullptr) {
+    *isa_dir = dir;
+  }
+
+  // Get the base part of the file without the extension.
+  std::string file = location.substr(pos+1);
+  pos = file.rfind('.');
+  if (pos == std::string::npos) {
+    *error_msg = "Dex location " + location + " has no extension.";
+    return false;
+  }
+  std::string base = file.substr(0, pos);
+
+  *odex_filename = dir + "/" + base + ".odex";
+  return true;
+}
+
+// Prepare a subcomponent of the odex directory.
+// (i.e. create and set the expected permissions on the path `dir`).
+static bool PrepareDirectory(const std::string& dir, std::string* error_msg) {
+  struct stat dir_stat;
+  if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &dir_stat)) == 0) {
+    // The directory exists. Check if it is indeed a directory.
+    if (!S_ISDIR(dir_stat.st_mode)) {
+      *error_msg = dir + " is not a dir";
+      return false;
+    } else {
+      // The dir is already on disk.
+      return true;
+    }
+  }
+
+  // Failed to stat. We need to create the directory.
+  if (errno != ENOENT) {
+    *error_msg = "Could not stat isa dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+
+  mode_t mode = S_IRWXU | S_IXGRP | S_IXOTH;
+  if (mkdir(dir.c_str(), mode) != 0) {
+    *error_msg = "Could not create dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+  if (chmod(dir.c_str(), mode) != 0) {
+    *error_msg = "Could not create the oat dir " + dir + ":" + strerror(errno);
+    return false;
+  }
+  return true;
+}
+
+// Prepares the odex directory for the given dex location.
+static bool PrepareOdexDirectories(const std::string& dex_location,
+                                   const std::string& expected_odex_location,
+                                   InstructionSet isa,
+                                   std::string* error_msg) {
+  std::string actual_odex_location;
+  std::string oat_dir;
+  std::string isa_dir;
+  if (!DexLocationToOdexNames(
+        dex_location, isa, &actual_odex_location, &oat_dir, &isa_dir, error_msg)) {
+    return false;
+  }
+  DCHECK_EQ(expected_odex_location, actual_odex_location);
+
+  if (!PrepareDirectory(oat_dir, error_msg)) {
+    return false;
+  }
+  if (!PrepareDirectory(isa_dir, error_msg)) {
+    return false;
+  }
+  return true;
+}
+
+OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks(
+      OatFileAssistant::OatFileInfo& info, std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
@@ -490,22 +624,37 @@
     return kUpdateNotAttempted;
   }
 
-  if (oat_.Filename() == nullptr) {
+  if (info.Filename() == nullptr) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return kUpdateNotAttempted;
   }
-  const std::string& oat_file_name = *oat_.Filename();
+  const std::string& oat_file_name = *info.Filename();
   const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex");
 
   // dex2oat ignores missing dex files and doesn't report an error.
   // Check explicitly here so we can detect the error properly.
   // TODO: Why does dex2oat behave that way?
-  if (!OS::FileExists(dex_location_.c_str())) {
-    *error_msg = "Dex location " + dex_location_ + " does not exists.";
+  struct stat dex_path_stat;
+  if (TEMP_FAILURE_RETRY(stat(dex_location_.c_str(), &dex_path_stat)) != 0) {
+    *error_msg = "Could not access dex location " + dex_location_ + ":" + strerror(errno);
     return kUpdateNotAttempted;
   }
 
+  // If this is the odex location, we need to create the odex file layout (../oat/isa/..)
+  if (!info.IsOatLocation()) {
+    if (!PrepareOdexDirectories(dex_location_, oat_file_name, isa_, error_msg)) {
+      return kUpdateNotAttempted;
+    }
+  }
+
+  // Set the permissions for the oat and the vdex files.
+  // The user always gets read and write while the group and others propagate
+  // the reading access of the original dex file.
+  mode_t file_mode = S_IRUSR | S_IWUSR |
+      (dex_path_stat.st_mode & S_IRGRP) |
+      (dex_path_stat.st_mode & S_IROTH);
+
   std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str()));
   if (vdex_file.get() == nullptr) {
     *error_msg = "Generation of oat file " + oat_file_name
@@ -514,7 +663,7 @@
     return kUpdateNotAttempted;
   }
 
-  if (fchmod(vdex_file->Fd(), 0644) != 0) {
+  if (fchmod(vdex_file->Fd(), file_mode) != 0) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the vdex file " + vdex_file_name
       + " could not be made world readable.";
@@ -528,7 +677,7 @@
     return kUpdateNotAttempted;
   }
 
-  if (fchmod(oat_file->Fd(), 0644) != 0) {
+  if (fchmod(oat_file->Fd(), file_mode) != 0) {
     *error_msg = "Generation of oat file " + oat_file_name
       + " not attempted because the oat file could not be made world readable.";
     oat_file->Erase();
@@ -563,8 +712,8 @@
     return kUpdateFailed;
   }
 
-  // Mark that the oat file has changed and we should try to reload.
-  oat_.Reset();
+  // Mark that the odex file has changed and we should try to reload.
+  info.Reset();
   return kUpdateSucceeded;
 }
 
@@ -623,35 +772,7 @@
                                                  InstructionSet isa,
                                                  std::string* odex_filename,
                                                  std::string* error_msg) {
-  CHECK(odex_filename != nullptr);
-  CHECK(error_msg != nullptr);
-
-  // The odex file name is formed by replacing the dex_location extension with
-  // .odex and inserting an oat/<isa> directory. For example:
-  //   location = /foo/bar/baz.jar
-  //   odex_location = /foo/bar/oat/<isa>/baz.odex
-
-  // Find the directory portion of the dex location and add the oat/<isa>
-  // directory.
-  size_t pos = location.rfind('/');
-  if (pos == std::string::npos) {
-    *error_msg = "Dex location " + location + " has no directory.";
-    return false;
-  }
-  std::string dir = location.substr(0, pos+1);
-  dir += "oat/" + std::string(GetInstructionSetString(isa));
-
-  // Get the base part of the file without the extension.
-  std::string file = location.substr(pos+1);
-  pos = file.rfind('.');
-  if (pos == std::string::npos) {
-    *error_msg = "Dex location " + location + " has no extension.";
-    return false;
-  }
-  std::string base = file.substr(0, pos);
-
-  *odex_filename = dir + "/" + base + ".odex";
-  return true;
+  return DexLocationToOdexNames(location, isa, odex_filename, nullptr, nullptr, error_msg);
 }
 
 bool OatFileAssistant::DexLocationToOatFilename(const std::string& location,
@@ -752,8 +873,45 @@
 }
 
 OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() {
-  bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen;
-  return use_oat ? oat_ : odex_;
+  // TODO(calin): Document the side effects of class loading when
+  // running dalvikvm command line.
+  if (dex_parent_writable_) {
+    // If the parent of the dex file is writable it means that we can
+    // create the odex file. In this case we unconditionally pick the odex
+    // as the best oat file. This corresponds to the regular use case when
+    // apps gets installed or when they load private, secondary dex file.
+    // For apps on the system partition the odex location will not be
+    // writable and thus the oat location might be more up to date.
+    return odex_;
+  }
+
+  // We cannot write to the odex location. This must be a system app.
+
+  // If the oat location is usable take it.
+  if (oat_.IsUseable()) {
+    return oat_;
+  }
+
+  // The oat file is not usable but the odex file might be up to date.
+  // This is an indication that we are dealing with an up to date prebuilt
+  // (that doesn't need relocation).
+  if (odex_.Status() == kOatUpToDate) {
+    return odex_;
+  }
+
+  // The oat file is not usable and the odex file is not up to date.
+  // However we have access to the original dex file which means we can make
+  // the oat location up to date.
+  if (HasOriginalDexFiles()) {
+    return oat_;
+  }
+
+  // We got into the worst situation here:
+  // - the oat location is not usable
+  // - the prebuild odex location is not up to date
+  // - and we don't have the original dex file anymore (stripped).
+  // Pick the odex if it exists, or the oat if not.
+  return (odex_.Status() == kOatCannotOpen) ? oat_ : odex_;
 }
 
 std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index b84e711..7e2385e 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -47,6 +47,11 @@
 // dex location is in the boot class path.
 class OatFileAssistant {
  public:
+  // The default compile filter to use when optimizing dex file at load time if they
+  // are out of date.
+  static const CompilerFilter::Filter kDefaultCompilerFilterForDexLoading =
+      CompilerFilter::kQuicken;
+
   enum DexOptNeeded {
     // No dexopt should (or can) be done to update the apk/jar.
     // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
@@ -117,13 +122,6 @@
                    const InstructionSet isa,
                    bool load_executable);
 
-  // Constructs an OatFileAssistant, providing an explicit target oat_location
-  // to use instead of the standard oat location.
-  OatFileAssistant(const char* dex_location,
-                   const char* oat_location,
-                   const InstructionSet isa,
-                   bool load_executable);
-
   ~OatFileAssistant();
 
   // Returns true if the dex location refers to an element of the boot class
@@ -232,16 +230,6 @@
   // Returns the status of the oat file for the dex location.
   OatStatus OatFileStatus();
 
-  // Generate the oat file from the dex file using the current runtime
-  // compiler options.
-  // This does not check the current status before attempting to generate the
-  // oat file.
-  //
-  // If the result is not kUpdateSucceeded, the value of error_msg will be set
-  // to a string describing why there was a failure or the update was not
-  // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg);
-
   // Executes dex2oat using the current runtime configuration overridden with
   // the given arguments. This does not check to see if dex2oat is enabled in
   // the runtime configuration.
@@ -377,6 +365,16 @@
     bool file_released_ = false;
   };
 
+  // Generate the oat file for the given info from the dex file using the
+  // current runtime compiler options.
+  // This does not check the current status before attempting to generate the
+  // oat file.
+  //
+  // If the result is not kUpdateSucceeded, the value of error_msg will be set
+  // to a string describing why there was a failure or the update was not
+  // attempted. error_msg must not be null.
+  ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg);
+
   // Return info for the best oat file.
   OatFileInfo& GetBestInfo();
 
@@ -422,6 +420,9 @@
 
   std::string dex_location_;
 
+  // Whether or not the parent directory of the dex file is writable.
+  bool dex_parent_writable_ = false;
+
   // In a properly constructed OatFileAssistant object, isa_ should be either
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
@@ -446,6 +447,8 @@
   bool image_info_load_attempted_ = false;
   std::unique_ptr<ImageInfo> cached_image_info_;
 
+  friend class OatFileAssistantTest;
+
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 18924e9..b2b86ee 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -43,6 +43,38 @@
   }
 };
 
+class ScopedNonWritable {
+ public:
+  explicit ScopedNonWritable(const std::string& dex_location) {
+    is_valid_ = false;
+    size_t pos = dex_location.rfind('/');
+    if (pos != std::string::npos) {
+      is_valid_ = true;
+      dex_parent_ = dex_location.substr(0, pos);
+      if (chmod(dex_parent_.c_str(), 0555) != 0)  {
+        PLOG(ERROR) << "Could not change permissions on " << dex_parent_;
+      }
+    }
+  }
+
+  bool IsSuccessful() { return is_valid_ && (access(dex_parent_.c_str(), W_OK) != 0); }
+
+  ~ScopedNonWritable() {
+    if (is_valid_) {
+      if (chmod(dex_parent_.c_str(), 0777) != 0) {
+        PLOG(ERROR) << "Could not restore permissions on " << dex_parent_;
+      }
+    }
+  }
+
+ private:
+  std::string dex_parent_;
+  bool is_valid_;
+};
+
+static bool IsExecutedAsRoot() {
+  return geteuid() == 0;
+}
 
 // Case: We have a DEX file, but no OAT file for it.
 // Expect: The status is kDex2OatNeeded.
@@ -87,13 +119,126 @@
   EXPECT_EQ(nullptr, oat_file.get());
 }
 
+// Case: We have a DEX file and a PIC ODEX file, but no OAT file.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
+TEST_F(OatFileAssistantTest, OdexUpToDate) {
+  std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar";
+  std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex";
+  Copy(GetDexSrc1(), dex_location);
+  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+
+  // For the use of oat location by making the dex parent not writable.
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and a PIC ODEX file, but no OAT file. We load the dex
+// file via a symlink.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
+TEST_F(OatFileAssistantTest, OdexUpToDateSymLink) {
+  std::string scratch_dir = GetScratchDir();
+  std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar";
+  std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex";
+
+  Copy(GetDexSrc1(), dex_location);
+  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+
+  // Now replace the dex location with a symlink.
+  std::string link = scratch_dir + "/link";
+  ASSERT_EQ(0, symlink(scratch_dir.c_str(), link.c_str()));
+  dex_location = link + "/OdexUpToDate.jar";
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, OatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  // For the use of oat location by making the dex parent not writable.
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and up-to-date OAT file for it. We load the dex file
+// via a symlink.
+// Expect: The status is kNoDexOptNeeded.
+TEST_F(OatFileAssistantTest, OatUpToDateSymLink) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
+  std::string real = GetScratchDir() + "/real";
+  ASSERT_EQ(0, mkdir(real.c_str(), 0700));
+  std::string link = GetScratchDir() + "/link";
+  ASSERT_EQ(0, symlink(real.c_str(), link.c_str()));
+
+  std::string dex_location = real + "/OatUpToDate.jar";
+
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
+
+  // Update the dex location to point to the symlink.
+  dex_location = link + "/OatUpToDate.jar";
+
+  // For the use of oat location by making the dex parent not writable.
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -120,19 +265,16 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOdex.jar";
-  std::string oat_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat";
+  std::string odex_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat";
 
   Copy(GetDexSrc1(), dex_location);
 
   // Generating and deleting the oat file should have the side effect of
   // creating an up-to-date vdex file.
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   // Even though the vdex file is up to date, because we don't have the oat
   // file, we can't know that the vdex depends on the boot image and is up to
@@ -169,6 +311,11 @@
   if (!kIsVdexEnabled) {
     return;
   }
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
 
   std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOat.jar";
   std::string oat_location;
@@ -180,6 +327,8 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   ASSERT_EQ(0, unlink(oat_location.c_str()));
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   // Even though the vdex file is up to date, because we don't have the oat
@@ -195,10 +344,19 @@
 // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but
 // kDex2Oat if the profile has changed.
 TEST_F(OatFileAssistantTest, ProfileOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
@@ -219,10 +377,19 @@
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
 // Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
@@ -240,6 +407,12 @@
 // Case: We have a MultiDEX file where the non-main multdex entry is out of date.
 // Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/MultiDexNonMainOutOfDate.jar";
 
   // Compile code for GetMultiDexSrc1.
@@ -250,6 +423,9 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
@@ -287,12 +463,12 @@
   EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus());
 }
 
-// Case: We have a MultiDEX file and up-to-date OAT file for it with relative
+// Case: We have a MultiDEX file and up-to-date ODEX file for it with relative
 // encoded dex locations.
 // Expect: The oat file status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
   std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
-  std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat";
+  std::string odex_location = GetOdexDir() + "/RelativeEncodedDexLocation.odex";
 
   // Create the dex file
   Copy(GetMultiDexSrc1(), dex_location);
@@ -301,16 +477,15 @@
   std::vector<std::string> args;
   args.push_back("--dex-file=" + dex_location);
   args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar"));
-  args.push_back("--oat-file=" + oat_location);
+  args.push_back("--oat-file=" + odex_location);
   args.push_back("--compiler-filter=speed");
 
   std::string error_msg;
   ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
 
   // Verify we can load both dex files.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -322,6 +497,12 @@
 // Case: We have a DEX file and an OAT file out of date with respect to the
 // dex checksum.
 TEST_F(OatFileAssistantTest, OatDexOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar";
 
   // We create a dex, generate an oat for it, then overwrite the dex with a
@@ -330,6 +511,9 @@
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
   Copy(GetDexSrc2(), dex_location);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
@@ -351,17 +535,14 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexDexOutOfDate.jar";
-  std::string oat_location = GetOdexDir() + "/VdexDexOutOfDate.oat";
+  std::string odex_location = GetOdexDir() + "/VdexDexOutOfDate.oat";
 
   Copy(GetDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -376,17 +557,14 @@
   }
 
   std::string dex_location = GetScratchDir() + "/VdexMultiDexNonMainOutOfDate.jar";
-  std::string oat_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.oat";
+  std::string odex_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.odex";
 
   Copy(GetMultiDexSrc1(), dex_location);
-  GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed);
-  ASSERT_EQ(0, unlink(oat_location.c_str()));
+  GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
+  ASSERT_EQ(0, unlink(odex_location.c_str()));
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-                                      oat_location.c_str(),
-                                      kRuntimeISA,
-                                      false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
@@ -395,6 +573,12 @@
 // Case: We have a DEX file and an OAT file out of date with respect to the
 // boot image.
 TEST_F(OatFileAssistantTest, OatImageOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
@@ -404,6 +588,9 @@
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
@@ -423,6 +610,12 @@
 // It shouldn't matter that the OAT file is out of date, because it is
 // verify-at-runtime.
 TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
@@ -432,6 +625,9 @@
                      /*pic*/false,
                      /*with_alternate_image*/true);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
@@ -586,24 +782,23 @@
 TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
   std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
-  std::string oat_location = GetOdexDir() + "/OdexOatOverlap.oat";
 
-  // Create the dex and odex files
+  // Create the dex, the odex and the oat files.
   Copy(GetDexSrc1(), dex_location);
   GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
-
-  // Create the oat file by copying the odex so they are located in the same
-  // place in memory.
-  Copy(odex_location, oat_location);
+  GenerateOatForTest(dex_location.c_str(),
+                     CompilerFilter::kSpeed,
+                     /*relocate*/false,
+                     /*pic*/false,
+                     /*with_alternate_image*/false);
 
   // Verify things don't go bad.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  // kDex2OatForRelocation is expected rather than -kDex2OatForRelocation
-  // based on the assumption that the oat location is more up-to-date than the odex
+  // -kDex2OatForRelocation is expected rather than kDex2OatForRelocation
+  // based on the assumption that the odex location is more up-to-date than the oat
   // location, even if they both need relocation.
-  EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation,
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
@@ -621,30 +816,6 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a DEX file and a PIC ODEX file, but no OAT file.
-// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
-TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
-  std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar";
-  std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex";
-
-  // Create the dex and odex files
-  Copy(GetDexSrc1(), dex_location);
-  GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed);
-
-  // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
-      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
-  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file.
 // Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code.
 TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) {
@@ -672,11 +843,20 @@
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: We should load an executable dex file.
 TEST_F(OatFileAssistantTest, LoadOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
@@ -691,11 +871,20 @@
 // Case: We have a DEX file and up-to-date quicken OAT file for it.
 // Expect: We should still load the oat file as executable.
 TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kQuicken);
 
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   // Load the oat using an oat file assistant.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
@@ -710,9 +899,19 @@
 // Case: We have a DEX file and up-to-date OAT file for it.
 // Expect: Loading non-executable should load the oat non-executable.
 TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) {
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
+
   std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar";
 
   Copy(GetDexSrc1(), dex_location);
+
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
   GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed);
 
   // Load the oat using an oat file assistant.
@@ -726,70 +925,33 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a DEX file.
-// Expect: We should load an executable dex file from an alternative oat
-// location.
-TEST_F(OatFileAssistantTest, LoadDexNoAlternateOat) {
-  std::string dex_location = GetScratchDir() + "/LoadDexNoAlternateOat.jar";
-  std::string oat_location = GetScratchDir() + "/LoadDexNoAlternateOat.oat";
-
-  Copy(GetDexSrc1(), dex_location);
-
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
-  std::string error_msg;
-  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
-      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
-
-  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-  ASSERT_TRUE(oat_file.get() != nullptr);
-  EXPECT_TRUE(oat_file->IsExecutable());
-  std::vector<std::unique_ptr<const DexFile>> dex_files;
-  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
-  EXPECT_EQ(1u, dex_files.size());
-
-  EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
-
-  // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus());
-}
-
-// Case: We have a DEX file but can't write the oat file.
-// Expect: We should fail to make the oat file up to date.
-TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) {
-  std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar";
-
-  // Make the oat location unwritable by inserting some non-existent
-  // intermediate directories.
-  std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat";
-
-  Copy(GetDexSrc1(), dex_location);
-
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
-  std::string error_msg;
-  Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.MakeUpToDate(false, &error_msg));
-
-  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-  ASSERT_TRUE(oat_file.get() == nullptr);
-}
-
 // Case: We don't have a DEX file and can't write the oat file.
 // Expect: We should fail to generate the oat file without crashing.
 TEST_F(OatFileAssistantTest, GenNoDex) {
-  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
-  std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
+  if (IsExecutedAsRoot()) {
+    // We cannot simulate non writable locations when executed as root: b/38000545.
+    LOG(ERROR) << "Test skipped because it's running as root";
+    return;
+  }
 
-  OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
+
+  ScopedNonWritable scoped_non_writable(dex_location);
+  ASSERT_TRUE(scoped_non_writable.IsSuccessful());
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
-  EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted,
-      oat_file_assistant.GenerateOatFile(&error_msg));
+  // We should get kUpdateSucceeded from MakeUpToDate since there's nothing
+  // that can be done in this situation.
+  ASSERT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg));
+
+  // Verify it didn't create an oat in the default location (dalvik-cache).
+  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus());
+  // Verify it didn't create the odex file in the default location (../oat/isa/...odex)
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OdexFileStatus());
 }
 
 // Turn an absolute path into a path relative to the current working
@@ -1006,9 +1168,9 @@
   Runtime::Current()->AddCompilerOption("--compiler-filter=quicken");
   EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
       oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken));
-  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 932d5ed..c1cf800 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -615,9 +615,7 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   Runtime* const runtime = Runtime::Current();
 
-  // TODO(calin): remove the explicit oat_location for OatFileAssistant
   OatFileAssistant oat_file_assistant(dex_location,
-                                      /*oat_location*/ nullptr,
                                       kRuntimeISA,
                                       !runtime->IsAotCompiler());
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index f2a2af2..152b0ba 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -54,6 +54,7 @@
     return FromCodePointer(EntryPointToCodePointer(entry_point));
   }
 
+  OatQuickMethodHeader(const OatQuickMethodHeader&) = default;
   OatQuickMethodHeader& operator=(const OatQuickMethodHeader&) = default;
 
   uintptr_t NativeQuickPcOffset(const uintptr_t pc) const {
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index c3a94b9..9be486e 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1078,9 +1078,180 @@
                                           jint* extension_count_ptr,
                                           jvmtiExtensionFunctionInfo** extensions) {
     ENSURE_VALID_ENV(env);
-    // We do not have any extension functions.
-    *extension_count_ptr = 0;
-    *extensions = nullptr;
+    ENSURE_NON_NULL(extension_count_ptr);
+    ENSURE_NON_NULL(extensions);
+
+    std::vector<jvmtiExtensionFunctionInfo> ext_vector;
+
+    // Holders for allocated values.
+    std::vector<JvmtiUniquePtr<char[]>> char_buffers;
+    std::vector<JvmtiUniquePtr<jvmtiParamInfo[]>> param_buffers;
+    std::vector<JvmtiUniquePtr<jvmtiError[]>> error_buffers;
+
+    // Add a helper struct that takes an arbitrary const char*. add_extension will use Allocate
+    // appropriately.
+    struct CParamInfo {
+      const char* name;
+      jvmtiParamKind kind;
+      jvmtiParamTypes base_type;
+      jboolean null_ok;
+    };
+
+    auto add_extension = [&](jvmtiExtensionFunction func,
+                             const char* id,
+                             const char* short_description,
+                             jint param_count,
+                             const std::vector<CParamInfo>& params,
+                             jint error_count,
+                             const std::vector<jvmtiError>& errors) {
+      jvmtiExtensionFunctionInfo func_info;
+      jvmtiError error;
+
+      func_info.func = func;
+
+      JvmtiUniquePtr<char[]> id_ptr = CopyString(env, id, &error);
+      if (id_ptr == nullptr) {
+        return error;
+      }
+      func_info.id = id_ptr.get();
+      char_buffers.push_back(std::move(id_ptr));
+
+      JvmtiUniquePtr<char[]> descr = CopyString(env, short_description, &error);
+      if (descr == nullptr) {
+        return error;
+      }
+      func_info.short_description = descr.get();
+      char_buffers.push_back(std::move(descr));
+
+      func_info.param_count = param_count;
+      if (param_count > 0) {
+        JvmtiUniquePtr<jvmtiParamInfo[]> params_ptr =
+            AllocJvmtiUniquePtr<jvmtiParamInfo[]>(env, param_count, &error);
+        if (params_ptr == nullptr) {
+          return error;
+        }
+        func_info.params = params_ptr.get();
+        param_buffers.push_back(std::move(params_ptr));
+
+        for (jint i = 0; i != param_count; ++i) {
+          JvmtiUniquePtr<char[]> param_name = CopyString(env, params[i].name, &error);
+          if (param_name == nullptr) {
+            return error;
+          }
+          func_info.params[i].name = param_name.get();
+          char_buffers.push_back(std::move(param_name));
+
+          func_info.params[i].kind = params[i].kind;
+          func_info.params[i].base_type = params[i].base_type;
+          func_info.params[i].null_ok = params[i].null_ok;
+        }
+      } else {
+        func_info.params = nullptr;
+      }
+
+      func_info.error_count = error_count;
+      if (error_count > 0) {
+        JvmtiUniquePtr<jvmtiError[]> errors_ptr =
+            AllocJvmtiUniquePtr<jvmtiError[]>(env, error_count, &error);
+        if (errors_ptr == nullptr) {
+          return error;
+        }
+        func_info.errors = errors_ptr.get();
+        error_buffers.push_back(std::move(errors_ptr));
+
+        for (jint i = 0; i != error_count; ++i) {
+          func_info.errors[i] = errors[i];
+        }
+      } else {
+        func_info.errors = nullptr;
+      }
+
+      ext_vector.push_back(func_info);
+
+      return ERR(NONE);
+    };
+
+    jvmtiError error;
+
+    // Heap extensions.
+    error = add_extension(
+        reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetObjectHeapId),
+        "com.android.art.heap.get_object_heap_id",
+        "Retrieve the heap id of the the object tagged with the given argument. An "
+            "arbitrary object is chosen if multiple objects exist with the same tag.",
+        2,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            { "tag", JVMTI_KIND_IN, JVMTI_TYPE_JLONG, false},
+            { "heap_id", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false}
+        },
+        1,
+        { JVMTI_ERROR_NOT_FOUND });
+    if (error != ERR(NONE)) {
+      return error;
+    }
+
+    error = add_extension(
+        reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetHeapName),
+        "com.android.art.heap.get_heap_name",
+        "Retrieve the name of the heap with the given id.",
+        2,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            { "heap_id", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false},
+            { "heap_name", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_CCHAR, false}
+        },
+        1,
+        { JVMTI_ERROR_ILLEGAL_ARGUMENT });
+    if (error != ERR(NONE)) {
+      return error;
+    }
+
+    error = add_extension(
+        reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::IterateThroughHeapExt),
+        "com.android.art.heap.iterate_through_heap_ext",
+        "Iterate through a heap. This is equivalent to the standard IterateThroughHeap function,"
+        " except for additionally passing the heap id of the current object. The jvmtiHeapCallbacks"
+        " structure is reused, with the callbacks field overloaded to a signature of "
+        "jint (*)(jlong, jlong, jlong*, jint length, void*, jint).",
+        4,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            { "heap_filter", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false},
+            { "klass", JVMTI_KIND_IN, JVMTI_TYPE_JCLASS, true},
+            { "callbacks", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, false},
+            { "user_data", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, true}
+        },
+        3,
+        {                                                          // NOLINT [whitespace/braces] [4]
+            JVMTI_ERROR_MUST_POSSESS_CAPABILITY,
+            JVMTI_ERROR_INVALID_CLASS,
+            JVMTI_ERROR_NULL_POINTER
+        });
+    if (error != ERR(NONE)) {
+      return error;
+    }
+
+    // Copy into output buffer.
+
+    *extension_count_ptr = ext_vector.size();
+    JvmtiUniquePtr<jvmtiExtensionFunctionInfo[]> out_data =
+        AllocJvmtiUniquePtr<jvmtiExtensionFunctionInfo[]>(env, ext_vector.size(), &error);
+    if (out_data == nullptr) {
+      return error;
+    }
+    memcpy(out_data.get(),
+           ext_vector.data(),
+           ext_vector.size() * sizeof(jvmtiExtensionFunctionInfo));
+    *extensions = out_data.release();
+
+    // Release all the buffer holders, we're OK now.
+    for (auto& holder : char_buffers) {
+      holder.release();
+    }
+    for (auto& holder : param_buffers) {
+      holder.release();
+    }
+    for (auto& holder : error_buffers) {
+      holder.release();
+    }
 
     return ERR(NONE);
   }
@@ -1358,23 +1529,26 @@
 
   static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error,  char** name_ptr) {
     ENSURE_NON_NULL(name_ptr);
+    auto copy_fn = [&](const char* name_cstr) {
+      jvmtiError res;
+      JvmtiUniquePtr<char[]> copy = CopyString(env, name_cstr, &res);
+      if (copy == nullptr) {
+        *name_ptr = nullptr;
+        return res;
+      } else {
+        *name_ptr = copy.release();
+        return OK;
+      }
+    };
     switch (error) {
-#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
-          jvmtiError res; \
-          JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_"#e, &res); \
-          if (copy == nullptr) { \
-            *name_ptr = nullptr; \
-            return res; \
-          } else { \
-            *name_ptr = copy.release(); \
-            return OK; \
-          } \
-        } while (false)
+#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : \
+        return copy_fn("JVMTI_ERROR_"#e);
       ERROR_CASE(NONE);
       ERROR_CASE(INVALID_THREAD);
       ERROR_CASE(INVALID_THREAD_GROUP);
       ERROR_CASE(INVALID_PRIORITY);
       ERROR_CASE(THREAD_NOT_SUSPENDED);
+      ERROR_CASE(THREAD_SUSPENDED);
       ERROR_CASE(THREAD_NOT_ALIVE);
       ERROR_CASE(INVALID_OBJECT);
       ERROR_CASE(INVALID_CLASS);
@@ -1419,18 +1593,9 @@
       ERROR_CASE(UNATTACHED_THREAD);
       ERROR_CASE(INVALID_ENVIRONMENT);
 #undef ERROR_CASE
-      default: {
-        jvmtiError res;
-        JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_UNKNOWN", &res);
-        if (copy == nullptr) {
-          *name_ptr = nullptr;
-          return res;
-        } else {
-          *name_ptr = copy.release();
-          return ERR(ILLEGAL_ARGUMENT);
-        }
-      }
     }
+
+    return ERR(ILLEGAL_ARGUMENT);
   }
 
   static jvmtiError SetVerboseFlag(jvmtiEnv* env,
diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
index f67fffc..64ab3e7 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
@@ -384,6 +384,23 @@
   return ERR(NONE);
 }
 
+template <typename T>
+art::mirror::Object* JvmtiWeakTable<T>::Find(T tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  for (auto& pair : tagged_objects_) {
+    if (tag == pair.second) {
+      art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>();
+      if (obj != nullptr) {
+        return obj;
+      }
+    }
+  }
+  return nullptr;
+}
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
index eeea75a..a6fd247 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -116,6 +116,10 @@
   void Unlock() RELEASE(allow_disallow_lock_);
   void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
 
+  art::mirror::Object* Find(T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
  protected:
   // Should HandleNullSweep be called when Sweep detects the release of an object?
   virtual bool DoesHandleNullOnSweep() {
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 7fc5104..99774c6 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -651,14 +651,17 @@
   art::Runtime::Current()->RemoveSystemWeakHolder(&gIndexCachingTable);
 }
 
+template <typename Callback>
 struct IterateThroughHeapData {
-  IterateThroughHeapData(HeapUtil* _heap_util,
+  IterateThroughHeapData(Callback _cb,
+                         ObjectTagTable* _tag_table,
                          jvmtiEnv* _env,
                          art::ObjPtr<art::mirror::Class> klass,
                          jint _heap_filter,
                          const jvmtiHeapCallbacks* _callbacks,
                          const void* _user_data)
-      : heap_util(_heap_util),
+      : cb(_cb),
+        tag_table(_tag_table),
         heap_filter(_heap_filter),
         filter_klass(klass),
         env(_env),
@@ -667,7 +670,72 @@
         stop_reports(false) {
   }
 
-  HeapUtil* heap_util;
+  static void ObjectCallback(art::mirror::Object* obj, void* arg)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg);
+    ithd->ObjectCallback(obj);
+  }
+
+  void ObjectCallback(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    // Early return, as we can't really stop visiting.
+    if (stop_reports) {
+      return;
+    }
+
+    art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback");
+
+    jlong tag = 0;
+    tag_table->GetTag(obj, &tag);
+
+    jlong class_tag = 0;
+    art::ObjPtr<art::mirror::Class> klass = obj->GetClass();
+    tag_table->GetTag(klass.Ptr(), &class_tag);
+    // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
+
+    if (!heap_filter.ShouldReportByHeapFilter(tag, class_tag)) {
+      return;
+    }
+
+    if (filter_klass != nullptr) {
+      if (filter_klass != klass) {
+        return;
+      }
+    }
+
+    jlong size = obj->SizeOf();
+
+    jint length = -1;
+    if (obj->IsArrayInstance()) {
+      length = obj->AsArray()->GetLength();
+    }
+
+    jlong saved_tag = tag;
+    jint ret = cb(obj, callbacks, class_tag, size, &tag, length, const_cast<void*>(user_data));
+
+    if (tag != saved_tag) {
+      tag_table->Set(obj, tag);
+    }
+
+    stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
+
+    if (!stop_reports) {
+      jint string_ret = ReportString(obj, env, tag_table, callbacks, user_data);
+      stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0;
+    }
+
+    if (!stop_reports) {
+      jint array_ret = ReportPrimitiveArray(obj, env, tag_table, callbacks, user_data);
+      stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0;
+    }
+
+    if (!stop_reports) {
+      stop_reports = ReportPrimitiveField::Report(obj, tag_table, callbacks, user_data);
+    }
+  }
+
+  Callback cb;
+  ObjectTagTable* tag_table;
   const HeapFilter heap_filter;
   art::ObjPtr<art::mirror::Class> filter_klass;
   jvmtiEnv* env;
@@ -677,85 +745,14 @@
   bool stop_reports;
 };
 
-static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg);
-  // Early return, as we can't really stop visiting.
-  if (ithd->stop_reports) {
-    return;
-  }
-
-  art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback");
-
-  jlong tag = 0;
-  ithd->heap_util->GetTags()->GetTag(obj, &tag);
-
-  jlong class_tag = 0;
-  art::ObjPtr<art::mirror::Class> klass = obj->GetClass();
-  ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag);
-  // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
-
-  if (!ithd->heap_filter.ShouldReportByHeapFilter(tag, class_tag)) {
-    return;
-  }
-
-  if (ithd->filter_klass != nullptr) {
-    if (ithd->filter_klass != klass) {
-      return;
-    }
-  }
-
-  jlong size = obj->SizeOf();
-
-  jint length = -1;
-  if (obj->IsArrayInstance()) {
-    length = obj->AsArray()->GetLength();
-  }
-
-  jlong saved_tag = tag;
-  jint ret = ithd->callbacks->heap_iteration_callback(class_tag,
-                                                      size,
-                                                      &tag,
-                                                      length,
-                                                      const_cast<void*>(ithd->user_data));
-
-  if (tag != saved_tag) {
-    ithd->heap_util->GetTags()->Set(obj, tag);
-  }
-
-  ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
-
-  if (!ithd->stop_reports) {
-    jint string_ret = ReportString(obj,
-                                   ithd->env,
-                                   ithd->heap_util->GetTags(),
-                                   ithd->callbacks,
-                                   ithd->user_data);
-    ithd->stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0;
-  }
-
-  if (!ithd->stop_reports) {
-    jint array_ret = ReportPrimitiveArray(obj,
-                                          ithd->env,
-                                          ithd->heap_util->GetTags(),
-                                          ithd->callbacks,
-                                          ithd->user_data);
-    ithd->stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0;
-  }
-
-  if (!ithd->stop_reports) {
-    ithd->stop_reports = ReportPrimitiveField::Report(obj,
-                                                      ithd->heap_util->GetTags(),
-                                                      ithd->callbacks,
-                                                      ithd->user_data);
-  }
-}
-
-jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env,
-                                        jint heap_filter,
-                                        jclass klass,
-                                        const jvmtiHeapCallbacks* callbacks,
-                                        const void* user_data) {
+template <typename T>
+static jvmtiError DoIterateThroughHeap(T fn,
+                                       jvmtiEnv* env,
+                                       ObjectTagTable* tag_table,
+                                       jint heap_filter,
+                                       jclass klass,
+                                       const jvmtiHeapCallbacks* callbacks,
+                                       const void* user_data) {
   if (callbacks == nullptr) {
     return ERR(NULL_POINTER);
   }
@@ -763,18 +760,48 @@
   art::Thread* self = art::Thread::Current();
   art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
 
-  IterateThroughHeapData ithd(this,
-                              env,
-                              soa.Decode<art::mirror::Class>(klass),
-                              heap_filter,
-                              callbacks,
-                              user_data);
+  using Iterator = IterateThroughHeapData<T>;
+  Iterator ithd(fn,
+                tag_table,
+                env,
+                soa.Decode<art::mirror::Class>(klass),
+                heap_filter,
+                callbacks,
+                user_data);
 
-  art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd);
+  art::Runtime::Current()->GetHeap()->VisitObjects(Iterator::ObjectCallback, &ithd);
 
   return ERR(NONE);
 }
 
+jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env,
+                                        jint heap_filter,
+                                        jclass klass,
+                                        const jvmtiHeapCallbacks* callbacks,
+                                        const void* user_data) {
+  auto JvmtiIterateHeap = [](art::mirror::Object* obj ATTRIBUTE_UNUSED,
+                             const jvmtiHeapCallbacks* cb_callbacks,
+                             jlong class_tag,
+                             jlong size,
+                             jlong* tag,
+                             jint length,
+                             void* cb_user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return cb_callbacks->heap_iteration_callback(class_tag,
+                                                 size,
+                                                 tag,
+                                                 length,
+                                                 cb_user_data);
+  };
+  return DoIterateThroughHeap(JvmtiIterateHeap,
+                              env,
+                              ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(),
+                              heap_filter,
+                              klass,
+                              callbacks,
+                              user_data);
+}
+
 class FollowReferencesHelper FINAL {
  public:
   FollowReferencesHelper(HeapUtil* h,
@@ -1400,4 +1427,136 @@
 
   return ERR(NONE);
 }
+
+static constexpr jint kHeapIdDefault = 0;
+static constexpr jint kHeapIdImage = 1;
+static constexpr jint kHeapIdZygote = 2;
+static constexpr jint kHeapIdApp = 3;
+
+static jint GetHeapId(art::ObjPtr<art::mirror::Object> obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (obj == nullptr) {
+    return -1;
+  }
+
+  art::gc::Heap* const heap = art::Runtime::Current()->GetHeap();
+  const art::gc::space::ContinuousSpace* const space =
+      heap->FindContinuousSpaceFromObject(obj, true);
+  jint heap_type = kHeapIdApp;
+  if (space != nullptr) {
+    if (space->IsZygoteSpace()) {
+      heap_type = kHeapIdZygote;
+    } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
+      // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects
+      // as HPROF_HEAP_APP. b/35762934
+      heap_type = kHeapIdImage;
+    }
+  } else {
+    const auto* los = heap->GetLargeObjectsSpace();
+    if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(art::Thread::Current(), obj.Ptr())) {
+      heap_type = kHeapIdZygote;
+    }
+  }
+  return heap_type;
+};
+
+jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) {
+  if (heap_id == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* self = art::Thread::Current();
+
+  auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get();
+    art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag);
+    jint heap_type = GetHeapId(obj);
+    if (heap_type == -1) {
+      return ERR(NOT_FOUND);
+    }
+    *heap_id = heap_type;
+    return ERR(NONE);
+  };
+
+  if (!art::Locks::mutator_lock_->IsSharedHeld(self)) {
+    if (!self->IsThreadSuspensionAllowable()) {
+      return ERR(INTERNAL);
+    }
+    art::ScopedObjectAccess soa(self);
+    return work();
+  } else {
+    // We cannot use SOA in this case. We might be holding the lock, but may not be in the
+    // runnable state (e.g., during GC).
+    art::Locks::mutator_lock_->AssertSharedHeld(self);
+    // TODO: Investigate why ASSERT_SHARED_CAPABILITY doesn't work.
+    auto annotalysis_workaround = [&]() NO_THREAD_SAFETY_ANALYSIS {
+      return work();
+    };
+    return annotalysis_workaround();
+  }
+}
+
+static jvmtiError CopyStringAndReturn(jvmtiEnv* env, const char* in, char** out) {
+  jvmtiError error;
+  JvmtiUniquePtr<char[]> param_name = CopyString(env, in, &error);
+  if (param_name == nullptr) {
+    return error;
+  }
+  *out = param_name.release();
+  return ERR(NONE);
+}
+
+static constexpr const char* kHeapIdDefaultName = "default";
+static constexpr const char* kHeapIdImageName = "image";
+static constexpr const char* kHeapIdZygoteName = "zygote";
+static constexpr const char* kHeapIdAppName = "app";
+
+jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...) {
+  switch (heap_id) {
+    case kHeapIdDefault:
+      return CopyStringAndReturn(env, kHeapIdDefaultName, heap_name);
+    case kHeapIdImage:
+      return CopyStringAndReturn(env, kHeapIdImageName, heap_name);
+    case kHeapIdZygote:
+      return CopyStringAndReturn(env, kHeapIdZygoteName, heap_name);
+    case kHeapIdApp:
+      return CopyStringAndReturn(env, kHeapIdAppName, heap_name);
+
+    default:
+      return ERR(ILLEGAL_ARGUMENT);
+  }
+}
+
+jvmtiError HeapExtensions::IterateThroughHeapExt(jvmtiEnv* env,
+                                                 jint heap_filter,
+                                                 jclass klass,
+                                                 const jvmtiHeapCallbacks* callbacks,
+                                                 const void* user_data) {
+  if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.can_tag_objects != 1) { \
+    return ERR(MUST_POSSESS_CAPABILITY); \
+  }
+
+  // ART extension API: Also pass the heap id.
+  auto ArtIterateHeap = [](art::mirror::Object* obj,
+                           const jvmtiHeapCallbacks* cb_callbacks,
+                           jlong class_tag,
+                           jlong size,
+                           jlong* tag,
+                           jint length,
+                           void* cb_user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    jint heap_id = GetHeapId(obj);
+    using ArtExtensionAPI = jint (*)(jlong, jlong, jlong*, jint length, void*, jint);
+    return reinterpret_cast<ArtExtensionAPI>(cb_callbacks->heap_iteration_callback)(
+        class_tag, size, tag, length, cb_user_data, heap_id);
+  };
+  return DoIterateThroughHeap(ArtIterateHeap,
+                              env,
+                              ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(),
+                              heap_filter,
+                              klass,
+                              callbacks,
+                              user_data);
+}
+
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h
index dccecb4..0c973db 100644
--- a/runtime/openjdkjvmti/ti_heap.h
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -56,6 +56,18 @@
   ObjectTagTable* tags_;
 };
 
+class HeapExtensions {
+ public:
+  static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...);
+  static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...);
+
+  static jvmtiError JNICALL IterateThroughHeapExt(jvmtiEnv* env,
+                                                  jint heap_filter,
+                                                  jclass klass,
+                                                  const jvmtiHeapCallbacks* callbacks,
+                                                  const void* user_data);
+};
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
diff --git a/runtime/os.h b/runtime/os.h
index 46d89fb..7130fc3 100644
--- a/runtime/os.h
+++ b/runtime/os.h
@@ -44,7 +44,7 @@
   static File* CreateEmptyFileWriteOnly(const char* name);
 
   // Open a file with the specified open(2) flags.
-  static File* OpenFileWithFlags(const char* name, int flags);
+  static File* OpenFileWithFlags(const char* name, int flags, bool auto_flush = true);
 
   // Check if a file exists.
   static bool FileExists(const char* name);
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 1db09b4..0add496 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -51,10 +51,11 @@
   return art::CreateEmptyFile(name, O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
 }
 
-File* OS::OpenFileWithFlags(const char* name, int flags) {
+File* OS::OpenFileWithFlags(const char* name, int flags, bool auto_flush) {
   CHECK(name != nullptr);
   bool read_only = ((flags & O_ACCMODE) == O_RDONLY);
-  std::unique_ptr<File> file(new File(name, flags, 0666, !read_only));
+  bool check_usage = !read_only && auto_flush;
+  std::unique_ptr<File> file(new File(name, flags, 0666, check_usage));
   if (!file->IsOpened()) {
     return nullptr;
   }
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index b866941..db10103 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -530,7 +530,7 @@
   PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
-void QuickExceptionHandler::DeoptimizeSingleFrame() {
+void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) {
   DCHECK(is_deoptimization_);
 
   if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
@@ -544,6 +544,10 @@
   // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
+  LOG(INFO) << "Deoptimizing "
+            << deopt_method->PrettyMethod()
+            << " due to "
+            << GetDeoptimizationKindName(kind);
   if (Runtime::Current()->UseJitCompilation()) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
         deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 3ead7db..8090f9b 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -20,6 +20,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "deoptimization_kind.h"
 #include "stack_reference.h"
 
 namespace art {
@@ -62,7 +63,7 @@
   //   the result of IsDeoptimizeable().
   // - It can be either full-fragment, or partial-fragment deoptimization, depending
   //   on whether that single frame covers full or partial fragment.
-  void DeoptimizeSingleFrame() REQUIRES_SHARED(Locks::mutator_lock_);
+  void DeoptimizeSingleFrame(DeoptimizationKind kind) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index d3859b0..dbe7f5c 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -182,6 +182,26 @@
   }
 }
 
+template <typename MirrorType>
+inline MirrorType* ReadBarrier::IsMarked(MirrorType* ref) {
+  // Only read-barrier configurations can have mutators run while
+  // the GC is marking.
+  if (!kUseReadBarrier) {
+    return ref;
+  }
+  // IsMarked does not handle null, so handle it here.
+  if (ref == nullptr) {
+    return nullptr;
+  }
+  // IsMarked should only be called when the GC is marking.
+  if (!Thread::Current()->GetIsGcMarking()) {
+    return ref;
+  }
+
+  return reinterpret_cast<MirrorType*>(
+      Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarked(ref));
+}
+
 inline bool ReadBarrier::IsDuringStartup() {
   gc::Heap* heap = Runtime::Current()->GetHeap();
   if (heap == nullptr) {
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index cbc2697..2964090 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -64,6 +64,11 @@
                                                   GcRootSource* gc_root_source = nullptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Return the mirror Object if it is marked, or null if not.
+  template <typename MirrorType>
+  ALWAYS_INLINE static MirrorType* IsMarked(MirrorType* ref)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static bool IsDuringStartup();
 
   // Without the holder object.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b1acec6..3697f21 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -262,6 +262,9 @@
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
   interpreter::CheckInterpreterAsmConstants();
   callbacks_.reset(new RuntimeCallbacks());
+  for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+    deoptimization_counts_[i] = 0u;
+  }
 }
 
 Runtime::~Runtime() {
@@ -387,6 +390,7 @@
   low_4gb_arena_pool_.reset();
   arena_pool_.reset();
   jit_arena_pool_.reset();
+  protected_fault_page_.reset();
   MemMap::Shutdown();
 
   // TODO: acquire a static mutex on Runtime to avoid racing.
@@ -1182,12 +1186,6 @@
 
   if (!no_sig_chain_) {
     // Dex2Oat's Runtime does not need the signal chain or the fault handler.
-
-    // Initialize the signal chain so that any calls to sigaction get
-    // correctly routed to the next in the chain regardless of whether we
-    // have claimed the signal or not.
-    InitializeSignalChain();
-
     if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
       fault_manager.Init();
 
@@ -1403,6 +1401,27 @@
     callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInitialAgents);
   }
 
+  // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
+  // If we cannot reserve it, log a warning.
+  // Note: This is allocated last so that the heap and other things have priority, if necessary.
+  {
+    constexpr uintptr_t kSentinelAddr =
+        RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize);
+    protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page",
+                                                     reinterpret_cast<uint8_t*>(kSentinelAddr),
+                                                     kPageSize,
+                                                     PROT_NONE,
+                                                     true,
+                                                     false,
+                                                     &error_msg));
+    if (protected_fault_page_ == nullptr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page: " << error_msg;
+    } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page at the right address.";
+      protected_fault_page_.reset();
+    }
+  }
+
   VLOG(startup) << "Runtime::Init exiting";
 
   return true;
@@ -1575,6 +1594,23 @@
   register_sun_misc_Unsafe(env);
 }
 
+std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind) {
+  os << GetDeoptimizationKindName(kind);
+  return os;
+}
+
+void Runtime::DumpDeoptimizations(std::ostream& os) {
+  for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+    if (deoptimization_counts_[i] != 0) {
+      os << "Number of "
+         << GetDeoptimizationKindName(static_cast<DeoptimizationKind>(i))
+         << " deoptimizations: "
+         << deoptimization_counts_[i]
+         << "\n";
+    }
+  }
+}
+
 void Runtime::DumpForSigQuit(std::ostream& os) {
   GetClassLinker()->DumpForSigQuit(os);
   GetInternTable()->DumpForSigQuit(os);
@@ -1586,6 +1622,7 @@
   } else {
     os << "Running non JIT\n";
   }
+  DumpDeoptimizations(os);
   TrackedAllocators::Dump(os);
   os << "\n";
 
@@ -1795,11 +1832,6 @@
   thread_list_->VisitRoots(visitor, flags);
 }
 
-size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
-                                gc::collector::GarbageCollector* collector) {
-  return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector);
-}
-
 void Runtime::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
   VisitNonConcurrentRoots(visitor, flags);
   VisitConcurrentRoots(visitor, flags);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 3ba0f2c..2e3b8d7 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -29,6 +29,7 @@
 #include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "deoptimization_kind.h"
 #include "dex_file_types.h"
 #include "experimental_flags.h"
 #include "gc_root.h"
@@ -47,9 +48,6 @@
 namespace gc {
   class AbstractSystemWeakHolder;
   class Heap;
-  namespace collector {
-    class GarbageCollector;
-  }  // namespace collector
 }  // namespace gc
 
 namespace jit {
@@ -78,12 +76,12 @@
 class ArtMethod;
 class ClassHierarchyAnalysis;
 class ClassLinker;
-class Closure;
 class CompilerCallbacks;
 class DexFile;
 class InternTable;
 class JavaVMExt;
 class LinearAlloc;
+class MemMap;
 class MonitorList;
 class MonitorPool;
 class NullPointerHandler;
@@ -235,6 +233,7 @@
   // Detaches the current native thread from the runtime.
   void DetachCurrentThread() REQUIRES(!Locks::mutator_lock_);
 
+  void DumpDeoptimizations(std::ostream& os);
   void DumpForSigQuit(std::ostream& os);
   void DumpLockHolders(std::ostream& os);
 
@@ -338,11 +337,6 @@
   void VisitTransactionRoots(RootVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Flip thread roots from from-space refs to to-space refs.
-  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
-                         gc::collector::GarbageCollector* collector)
-      REQUIRES(!Locks::mutator_lock_);
-
   // Sweep system weaks, the system weak is deleted if the visitor return null. Otherwise, the
   // system weak is updated to be the visitor's returned value.
   void SweepSystemWeaks(IsMarkedVisitor* visitor)
@@ -682,6 +676,11 @@
     dump_gc_performance_on_shutdown_ = value;
   }
 
+  void IncrementDeoptimizationCount(DeoptimizationKind kind) {
+    DCHECK_LE(kind, DeoptimizationKind::kLast);
+    deoptimization_counts_[static_cast<size_t>(kind)]++;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -941,6 +940,11 @@
 
   std::unique_ptr<RuntimeCallbacks> callbacks_;
 
+  std::atomic<uint32_t> deoptimization_counts_[
+      static_cast<uint32_t>(DeoptimizationKind::kLast) + 1];
+
+  std::unique_ptr<MemMap> protected_fault_page_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc
index abe99e0..640f9ce 100644
--- a/runtime/runtime_callbacks_test.cc
+++ b/runtime/runtime_callbacks_test.cc
@@ -335,6 +335,9 @@
 };
 
 TEST_F(RuntimeSigQuitCallbackRuntimeCallbacksTest, SigQuit) {
+  // SigQuit induces a dump. ASAN isn't happy with libunwind reading memory.
+  TEST_DISABLED_FOR_MEMORY_TOOL_ASAN();
+
   // The runtime needs to be started for the signal handler.
   Thread* self = Thread::Current();
 
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 3826433..faea7b3 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -115,7 +115,7 @@
 
     for (uint32_t i = 0; i < kMaxRetries; ++i) {
         std::srand(NanoTime());
-        // Sample output for PID 1234 : /data/anr-pid1234-cafeffee.txt
+        // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt
         const std::string file_name = android::base::StringPrintf(
             "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt",
             stack_trace_dir_.c_str(),
@@ -135,19 +135,19 @@
 }
 
 void SignalCatcher::Output(const std::string& s) {
-  const std::string stack_trace_file = GetStackTraceFileName();
-  if (stack_trace_file.empty()) {
+  const std::string output_file = GetStackTraceFileName();
+  if (output_file.empty()) {
     LOG(INFO) << s;
     return;
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
-  int fd = open(stack_trace_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
+  int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
   if (fd == -1) {
-    PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
+    PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'";
     return;
   }
-  std::unique_ptr<File> file(new File(fd, stack_trace_file, true));
+  std::unique_ptr<File> file(new File(fd, output_file, true));
   bool success = file->WriteFully(s.data(), s.size());
   if (success) {
     success = file->FlushCloseOrErase() == 0;
@@ -155,9 +155,9 @@
     file->Erase();
   }
   if (success) {
-    LOG(INFO) << "Wrote stack traces to '" << stack_trace_file << "'";
+    LOG(INFO) << "Wrote stack traces to '" << output_file << "'";
   } else {
-    PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file << "'";
+    PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'";
   }
 }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 653a9bd..6848686 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1964,7 +1964,6 @@
 Thread::Thread(bool daemon)
     : tls32_(daemon),
       wait_monitor_(nullptr),
-      interrupted_(false),
       custom_tls_(nullptr),
       can_call_into_java_(true) {
   wait_mutex_ = new Mutex("a thread wait mutex");
@@ -1976,6 +1975,7 @@
                 "art::Thread has a size which is not a multiple of 4.");
   tls32_.state_and_flags.as_struct.flags = 0;
   tls32_.state_and_flags.as_struct.state = kNative;
+  tls32_.interrupted.StoreRelaxed(false);
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
   std::fill(tlsPtr_.rosalloc_runs,
             tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
@@ -2269,24 +2269,26 @@
 
 // Implements java.lang.Thread.interrupted.
 bool Thread::Interrupted() {
-  MutexLock mu(Thread::Current(), *wait_mutex_);
-  bool interrupted = IsInterruptedLocked();
-  SetInterruptedLocked(false);
+  DCHECK_EQ(Thread::Current(), this);
+  // No other thread can concurrently reset the interrupted flag.
+  bool interrupted = tls32_.interrupted.LoadSequentiallyConsistent();
+  if (interrupted) {
+    tls32_.interrupted.StoreSequentiallyConsistent(false);
+  }
   return interrupted;
 }
 
 // Implements java.lang.Thread.isInterrupted.
 bool Thread::IsInterrupted() {
-  MutexLock mu(Thread::Current(), *wait_mutex_);
-  return IsInterruptedLocked();
+  return tls32_.interrupted.LoadSequentiallyConsistent();
 }
 
 void Thread::Interrupt(Thread* self) {
   MutexLock mu(self, *wait_mutex_);
-  if (interrupted_) {
+  if (tls32_.interrupted.LoadSequentiallyConsistent()) {
     return;
   }
-  interrupted_ = true;
+  tls32_.interrupted.StoreSequentiallyConsistent(true);
   NotifyLocked(self);
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 6abde5b..a60fd58 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -487,15 +487,12 @@
   }
 
   // Implements java.lang.Thread.interrupted.
-  bool Interrupted() REQUIRES(!*wait_mutex_);
+  bool Interrupted();
   // Implements java.lang.Thread.isInterrupted.
-  bool IsInterrupted() REQUIRES(!*wait_mutex_);
-  bool IsInterruptedLocked() REQUIRES(wait_mutex_) {
-    return interrupted_;
-  }
+  bool IsInterrupted();
   void Interrupt(Thread* self) REQUIRES(!*wait_mutex_);
-  void SetInterruptedLocked(bool i) REQUIRES(wait_mutex_) {
-    interrupted_ = i;
+  void SetInterrupted(bool i) {
+    tls32_.interrupted.StoreSequentiallyConsistent(i);
   }
   void Notify() REQUIRES(!*wait_mutex_);
 
@@ -580,6 +577,13 @@
   }
 
   template<PointerSize pointer_size>
+  static ThreadOffset<pointer_size> InterruptedOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, interrupted));
+  }
+
+  template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> ThreadFlagsOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
@@ -1432,6 +1436,9 @@
     // GC roots.
     bool32_t is_gc_marking;
 
+    // Thread "interrupted" status; stays raised until queried or thrown.
+    Atomic<bool32_t> interrupted;
+
     // True if the thread is allowed to access a weak ref (Reference::GetReferent() and system
     // weaks) and to potentially mark an object alive/gray. This is used for concurrent reference
     // processing of the CC collector only. This is thread local so that we can enable/disable weak
@@ -1631,7 +1638,7 @@
     gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack;
   } tlsPtr_;
 
-  // Guards the 'interrupted_' and 'wait_monitor_' members.
+  // Guards the 'wait_monitor_' members.
   Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // Condition variable waited upon during a wait.
@@ -1639,9 +1646,6 @@
   // Pointer to the monitor lock we're currently waiting on or null if not waiting.
   Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
 
-  // Thread "interrupted" status; stays raised until queried or thrown.
-  bool interrupted_ GUARDED_BY(wait_mutex_);
-
   // Debug disable read barrier count, only is checked for debug builds and only in the runtime.
   uint8_t debug_disallow_read_barrier_ = 0;
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b63eaa4..dc2af2a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -34,6 +34,7 @@
 #include "base/timing_logger.h"
 #include "debugger.h"
 #include "gc/collector/concurrent_copying.h"
+#include "gc/gc_pause_listener.h"
 #include "gc/reference_processor.h"
 #include "jni_internal.h"
 #include "lock_word.h"
@@ -528,7 +529,8 @@
 // invariant.
 size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
                                    Closure* flip_callback,
-                                   gc::collector::GarbageCollector* collector) {
+                                   gc::collector::GarbageCollector* collector,
+                                   gc::GcPauseListener* pause_listener) {
   TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
@@ -542,6 +544,9 @@
   // pause.
   const uint64_t suspend_start_time = NanoTime();
   SuspendAllInternal(self, self, nullptr);
+  if (pause_listener != nullptr) {
+    pause_listener->StartPause();
+  }
 
   // Run the flip callback for the collector.
   Locks::mutator_lock_->ExclusiveLock(self);
@@ -549,6 +554,9 @@
   flip_callback->Run(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
   collector->RegisterPause(NanoTime() - suspend_start_time);
+  if (pause_listener != nullptr) {
+    pause_listener->EndPause();
+  }
 
   // Resume runnable threads.
   size_t runnable_thread_count = 0;
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 14bef5e..3375746 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -35,6 +35,7 @@
   namespace collector {
     class GarbageCollector;
   }  // namespac collector
+  class GcPauseListener;
 }  // namespace gc
 class Closure;
 class Thread;
@@ -121,7 +122,8 @@
   // the concurrent copying collector.
   size_t FlipThreadRoots(Closure* thread_flip_visitor,
                          Closure* flip_callback,
-                         gc::collector::GarbageCollector* collector)
+                         gc::collector::GarbageCollector* collector,
+                         gc::GcPauseListener* pause_listener)
       REQUIRES(!Locks::mutator_lock_,
                !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index e93f04d..842aa04 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -28,6 +28,7 @@
 
 namespace art {
 
+constexpr uint8_t VdexFile::Header::kVdexInvalidMagic[4];
 constexpr uint8_t VdexFile::Header::kVdexMagic[4];
 constexpr uint8_t VdexFile::Header::kVdexVersion[4];
 
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 9c0d9db..ece5491 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -61,6 +61,8 @@
     uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; }
     uint32_t GetNumberOfDexFiles() const { return number_of_dex_files_; }
 
+    static constexpr uint8_t kVdexInvalidMagic[] = { 'w', 'd', 'e', 'x' };
+
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
     static constexpr uint8_t kVdexVersion[] = { '0', '0', '5', '\0' };  // access flags
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index cb9c605..81bf293 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -33,7 +33,6 @@
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex_instruction_utils.h"
-#include "dex_instruction_visitor.h"
 #include "experimental_flags.h"
 #include "gc/accounting/card_table-inl.h"
 #include "handle_scope-inl.h"
@@ -1111,8 +1110,9 @@
   GetInstructionFlags(0).SetCompileTimeInfoPoint();
 
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
+  bool allow_runtime_only_instructions = !Runtime::Current()->IsAotCompiler() || verify_to_dump_;
   for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
-    if (!VerifyInstruction(inst, dex_pc)) {
+    if (!VerifyInstruction(inst, dex_pc, allow_runtime_only_instructions)) {
       DCHECK_NE(failures_.size(), 0U);
       return false;
     }
@@ -1139,8 +1139,10 @@
   return true;
 }
 
-bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
-  if (UNLIKELY(inst->IsExperimental())) {
+bool MethodVerifier::VerifyInstruction(const Instruction* inst,
+                                       uint32_t code_offset,
+                                       bool allow_runtime_only_instructions) {
+  if (Instruction::kHaveExperimentalInstructions && UNLIKELY(inst->IsExperimental())) {
     // Experimental instructions don't yet have verifier support implementation.
     // While it is possible to use them by themselves, when we try to use stable instructions
     // with a virtual register that was created by an experimental instruction,
@@ -1248,7 +1250,7 @@
       result = false;
       break;
   }
-  if (inst->GetVerifyIsRuntimeOnly() && Runtime::Current()->IsAotCompiler() && !verify_to_dump_) {
+  if (!allow_runtime_only_instructions && inst->GetVerifyIsRuntimeOnly()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "opcode only expected at runtime " << inst->Name();
     result = false;
   }
@@ -4345,7 +4347,7 @@
     }
   }
 
-  if (method_type == METHOD_POLYMORPHIC) {
+  if (UNLIKELY(method_type == METHOD_POLYMORPHIC)) {
     // Process the signature of the calling site that is invoking the method handle.
     DexFileParameterIterator it(*dex_file_, dex_file_->GetProtoId(inst->VRegH()));
     return VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, res_method);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 26dc15e..b34a3af 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -390,7 +390,9 @@
    * - (earlier) for each exception handler, the handler must start at a valid
    *   instruction
    */
-  bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
+  bool VerifyInstruction(const Instruction* inst,
+                         uint32_t code_offset,
+                         bool allow_runtime_only_instructions);
 
   /* Ensure that the register index is valid for this code item. */
   bool CheckRegisterIndex(uint32_t idx);
diff --git a/runtime/verifier/reg_type-inl.h b/runtime/verifier/reg_type-inl.h
index aa4a259..9245828 100644
--- a/runtime/verifier/reg_type-inl.h
+++ b/runtime/verifier/reg_type-inl.h
@@ -71,59 +71,62 @@
   if (lhs.Equals(rhs)) {
     return true;
   } else {
-    if (lhs.IsBoolean()) {
-      return rhs.IsBooleanTypes();
-    } else if (lhs.IsByte()) {
-      return rhs.IsByteTypes();
-    } else if (lhs.IsShort()) {
-      return rhs.IsShortTypes();
-    } else if (lhs.IsChar()) {
-      return rhs.IsCharTypes();
-    } else if (lhs.IsInteger()) {
-      return rhs.IsIntegralTypes();
-    } else if (lhs.IsFloat()) {
-      return rhs.IsFloatTypes();
-    } else if (lhs.IsLongLo()) {
-      return rhs.IsLongTypes();
-    } else if (lhs.IsDoubleLo()) {
-      return rhs.IsDoubleTypes();
-    } else if (lhs.IsConflict()) {
-      LOG(WARNING) << "RegType::AssignableFrom lhs is Conflict!";
-      return false;
-    } else {
-      CHECK(lhs.IsReferenceTypes())
-          << "Unexpected register type in IsAssignableFrom: '"
-          << lhs << "' := '" << rhs << "'";
-      if (rhs.IsZero()) {
-        return true;  // All reference types can be assigned null.
-      } else if (!rhs.IsReferenceTypes()) {
-        return false;  // Expect rhs to be a reference type.
-      } else if (lhs.IsUninitializedTypes() || rhs.IsUninitializedTypes()) {
-        // Uninitialized types are only allowed to be assigned to themselves.
-        // TODO: Once we have a proper "reference" super type, this needs to be extended.
+    switch (lhs.GetAssignmentType()) {
+      case AssignmentType::kBoolean:
+        return rhs.IsBooleanTypes();
+      case AssignmentType::kByte:
+        return rhs.IsByteTypes();
+      case AssignmentType::kShort:
+        return rhs.IsShortTypes();
+      case AssignmentType::kChar:
+        return rhs.IsCharTypes();
+      case AssignmentType::kInteger:
+        return rhs.IsIntegralTypes();
+      case AssignmentType::kFloat:
+        return rhs.IsFloatTypes();
+      case AssignmentType::kLongLo:
+        return rhs.IsLongTypes();
+      case AssignmentType::kDoubleLo:
+        return rhs.IsDoubleTypes();
+      case AssignmentType::kConflict:
+        LOG(WARNING) << "RegType::AssignableFrom lhs is Conflict!";
         return false;
-      } else if (lhs.IsJavaLangObject()) {
-        return true;  // All reference types can be assigned to Object.
-      } else if (!strict && !lhs.IsUnresolvedTypes() && lhs.GetClass()->IsInterface()) {
-        // If we're not strict allow assignment to any interface, see comment in ClassJoin.
-        return true;
-      } else if (lhs.IsJavaLangObjectArray()) {
-        return rhs.IsObjectArrayTypes();  // All reference arrays may be assigned to Object[]
-      } else if (lhs.HasClass() && rhs.HasClass()) {
-        // Test assignability from the Class point-of-view.
-        bool result = lhs.GetClass()->IsAssignableFrom(rhs.GetClass());
-        // Record assignability dependency. The `verifier` is null during unit tests and
-        // VerifiedMethod::GenerateSafeCastSet.
-        if (verifier != nullptr) {
-          VerifierDeps::MaybeRecordAssignability(
-              verifier->GetDexFile(), lhs.GetClass(), rhs.GetClass(), strict, result);
+      case AssignmentType::kReference:
+        if (rhs.IsZero()) {
+          return true;  // All reference types can be assigned null.
+        } else if (!rhs.IsReferenceTypes()) {
+          return false;  // Expect rhs to be a reference type.
+        } else if (lhs.IsUninitializedTypes() || rhs.IsUninitializedTypes()) {
+          // Uninitialized types are only allowed to be assigned to themselves.
+          // TODO: Once we have a proper "reference" super type, this needs to be extended.
+          return false;
+        } else if (lhs.IsJavaLangObject()) {
+          return true;  // All reference types can be assigned to Object.
+        } else if (!strict && !lhs.IsUnresolvedTypes() && lhs.GetClass()->IsInterface()) {
+          // If we're not strict allow assignment to any interface, see comment in ClassJoin.
+          return true;
+        } else if (lhs.IsJavaLangObjectArray()) {
+          return rhs.IsObjectArrayTypes();  // All reference arrays may be assigned to Object[]
+        } else if (lhs.HasClass() && rhs.HasClass()) {
+          // Test assignability from the Class point-of-view.
+          bool result = lhs.GetClass()->IsAssignableFrom(rhs.GetClass());
+          // Record assignability dependency. The `verifier` is null during unit tests and
+          // VerifiedMethod::GenerateSafeCastSet.
+          if (verifier != nullptr) {
+            VerifierDeps::MaybeRecordAssignability(
+                verifier->GetDexFile(), lhs.GetClass(), rhs.GetClass(), strict, result);
+          }
+          return result;
+        } else {
+          // Unresolved types are only assignable for null and equality.
+          return false;
         }
-        return result;
-      } else {
-        // Unresolved types are only assignable for null and equality.
-        return false;
-      }
+      case AssignmentType::kNotAssignable:
+        break;
     }
+    LOG(FATAL) << "Unexpected register type in IsAssignableFrom: '"
+               << lhs << "' := '" << rhs << "'";
+    UNREACHABLE();
   }
 }
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index dedf77f..25baac5 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -268,6 +268,52 @@
   static void* operator new(size_t size, ArenaAllocator* arena) = delete;
   static void* operator new(size_t size, ScopedArenaAllocator* arena);
 
+  enum class AssignmentType {
+    kBoolean,
+    kByte,
+    kShort,
+    kChar,
+    kInteger,
+    kFloat,
+    kLongLo,
+    kDoubleLo,
+    kConflict,
+    kReference,
+    kNotAssignable,
+  };
+
+  ALWAYS_INLINE
+  inline AssignmentType GetAssignmentType() const {
+    AssignmentType t = GetAssignmentTypeImpl();
+    if (kIsDebugBuild) {
+      if (IsBoolean()) {
+        CHECK(AssignmentType::kBoolean == t);
+      } else if (IsByte()) {
+        CHECK(AssignmentType::kByte == t);
+      } else if (IsShort()) {
+        CHECK(AssignmentType::kShort == t);
+      } else if (IsChar()) {
+        CHECK(AssignmentType::kChar == t);
+      } else if (IsInteger()) {
+        CHECK(AssignmentType::kInteger == t);
+      } else if (IsFloat()) {
+        CHECK(AssignmentType::kFloat == t);
+      } else if (IsLongLo()) {
+        CHECK(AssignmentType::kLongLo == t);
+      } else if (IsDoubleLo()) {
+        CHECK(AssignmentType::kDoubleLo == t);
+      } else if (IsConflict()) {
+        CHECK(AssignmentType::kConflict == t);
+      } else if (IsReferenceTypes()) {
+        CHECK(AssignmentType::kReference == t);
+      } else {
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+      }
+    }
+    return t;
+  }
+
  protected:
   RegType(mirror::Class* klass,
           const StringPiece& descriptor,
@@ -285,6 +331,8 @@
     }
   }
 
+  virtual AssignmentType GetAssignmentTypeImpl() const = 0;
+
   const StringPiece descriptor_;
   mutable GcRoot<mirror::Class> klass_;  // Non-const only due to moving classes.
   const uint16_t cache_id_;
@@ -341,6 +389,10 @@
   // Destroy the singleton instance.
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kConflict;
+  }
+
  private:
   ConflictType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -372,6 +424,10 @@
   // Destroy the singleton instance.
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
+
  private:
   UndefinedType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -407,6 +463,10 @@
   static const IntegerType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kInteger;
+  }
+
  private:
   IntegerType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -427,6 +487,10 @@
   static const BooleanType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kBoolean;
+  }
+
  private:
   BooleanType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -448,6 +512,10 @@
   static const ByteType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kByte;
+  }
+
  private:
   ByteType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -468,6 +536,10 @@
   static const ShortType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kShort;
+  }
+
  private:
   ShortType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -488,6 +560,10 @@
   static const CharType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kChar;
+  }
+
  private:
   CharType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -508,6 +584,10 @@
   static const FloatType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kFloat;
+  }
+
  private:
   FloatType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -535,6 +615,10 @@
   static const LongLoType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kLongLo;
+  }
+
  private:
   LongLoType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -555,6 +639,10 @@
   static const LongHiType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
+
  private:
   LongHiType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -576,6 +664,10 @@
   static const DoubleLoType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kDoubleLo;
+  }
+
  private:
   DoubleLoType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -596,6 +688,10 @@
   static const DoubleHiType* GetInstance() PURE;
   static void Destroy();
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
+
  private:
   DoubleHiType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -658,6 +754,10 @@
   }
   virtual bool IsConstantTypes() const OVERRIDE { return true; }
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
+
  private:
   const uint32_t constant_;
 };
@@ -673,6 +773,10 @@
   bool IsPreciseConstant() const OVERRIDE { return true; }
 
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 class PreciseConstLoType FINAL : public ConstantType {
@@ -684,6 +788,10 @@
   }
   bool IsPreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 class PreciseConstHiType FINAL : public ConstantType {
@@ -695,6 +803,10 @@
   }
   bool IsPreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 class ImpreciseConstType FINAL : public ConstantType {
@@ -706,6 +818,10 @@
   }
   bool IsImpreciseConstant() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 class ImpreciseConstLoType FINAL : public ConstantType {
@@ -717,6 +833,10 @@
   }
   bool IsImpreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 class ImpreciseConstHiType FINAL : public ConstantType {
@@ -728,6 +848,10 @@
   }
   bool IsImpreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kNotAssignable;
+  }
 };
 
 // Common parent of all uninitialized types. Uninitialized types are created by
@@ -747,6 +871,10 @@
     return allocation_pc_;
   }
 
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kReference;
+  }
+
  private:
   const uint32_t allocation_pc_;
 };
@@ -848,6 +976,10 @@
   bool HasClassVirtual() const OVERRIDE { return true; }
 
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kReference;
+  }
 };
 
 // A type of register holding a reference to an Object of type GetClass and only
@@ -866,6 +998,10 @@
   bool HasClassVirtual() const OVERRIDE { return true; }
 
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kReference;
+  }
 };
 
 // Common parent of unresolved types.
@@ -876,6 +1012,10 @@
       : RegType(nullptr, descriptor, cache_id) {}
 
   bool IsNonZeroReferenceTypes() const OVERRIDE;
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kReference;
+  }
 };
 
 // Similar to ReferenceType except the Class couldn't be loaded. Assignability
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index f4799d2..df4372f 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -93,6 +93,35 @@
 static decltype(&sigaction) linked_sigaction;
 static decltype(&sigprocmask) linked_sigprocmask;
 
+__attribute__((constructor)) static void InitializeSignalChain() {
+  static std::once_flag once;
+  std::call_once(once, []() {
+    void* linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
+    if (linked_sigaction_sym == nullptr) {
+      linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
+      if (linked_sigaction_sym == nullptr ||
+          linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
+        fatal("Unable to find next sigaction in signal chain");
+      }
+    }
+
+    void* linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
+    if (linked_sigprocmask_sym == nullptr) {
+      linked_sigprocmask_sym = dlsym(RTLD_DEFAULT, "sigprocmask");
+      if (linked_sigprocmask_sym == nullptr ||
+          linked_sigprocmask_sym == reinterpret_cast<void*>(sigprocmask)) {
+        fatal("Unable to find next sigprocmask in signal chain");
+      }
+    }
+
+    linked_sigaction =
+        reinterpret_cast<decltype(linked_sigaction)>(linked_sigaction_sym);
+    linked_sigprocmask =
+        reinterpret_cast<decltype(linked_sigprocmask)>(linked_sigprocmask_sym);
+  });
+}
+
+
 static pthread_key_t GetHandlingSignalKey() {
   static pthread_key_t key;
   static std::once_flag once;
@@ -161,10 +190,10 @@
     return action_;
   }
 
-  void AddSpecialHandler(SpecialSignalHandlerFn fn) {
-    for (SpecialSignalHandlerFn& slot : special_handlers_) {
-      if (slot == nullptr) {
-        slot = fn;
+  void AddSpecialHandler(SigchainAction* sa) {
+    for (SigchainAction& slot : special_handlers_) {
+      if (slot.sc_sigaction == nullptr) {
+        slot = *sa;
         return;
       }
     }
@@ -172,15 +201,15 @@
     fatal("too many special signal handlers");
   }
 
-  void RemoveSpecialHandler(SpecialSignalHandlerFn fn) {
+  void RemoveSpecialHandler(bool (*fn)(int, siginfo_t*, void*)) {
     // This isn't thread safe, but it's unlikely to be a real problem.
     size_t len = sizeof(special_handlers_)/sizeof(*special_handlers_);
     for (size_t i = 0; i < len; ++i) {
-      if (special_handlers_[i] == fn) {
+      if (special_handlers_[i].sc_sigaction == fn) {
         for (size_t j = i; j < len - 1; ++j) {
           special_handlers_[j] = special_handlers_[j + 1];
         }
-        special_handlers_[len - 1] = nullptr;
+        special_handlers_[len - 1].sc_sigaction = nullptr;
         return;
       }
     }
@@ -194,47 +223,37 @@
  private:
   bool claimed_;
   struct sigaction action_;
-  SpecialSignalHandlerFn special_handlers_[2];
+  SigchainAction special_handlers_[2];
 };
 
 static SignalChain chains[_NSIG];
 
-class ScopedSignalUnblocker {
- public:
-  explicit ScopedSignalUnblocker(const std::initializer_list<int>& signals) {
-    sigset_t new_mask;
-    sigemptyset(&new_mask);
-    for (int signal : signals) {
-      sigaddset(&new_mask, signal);
-    }
-    if (sigprocmask(SIG_UNBLOCK, &new_mask, &previous_mask_) != 0) {
-      fatal("failed to unblock signals: %s", strerror(errno));
-    }
-  }
-
-  ~ScopedSignalUnblocker() {
-    if (sigprocmask(SIG_SETMASK, &previous_mask_, nullptr) != 0) {
-      fatal("failed to unblock signals: %s", strerror(errno));
-    }
-  }
-
- private:
-  sigset_t previous_mask_;
-};
-
 void SignalChain::Handler(int signo, siginfo_t* siginfo, void* ucontext_raw) {
-  ScopedHandlingSignal handling_signal;
-
   // Try the special handlers first.
   // If one of them crashes, we'll reenter this handler and pass that crash onto the user handler.
   if (!GetHandlingSignal()) {
-    ScopedSignalUnblocker unblocked { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV }; // NOLINT
-    SetHandlingSignal(true);
-
     for (const auto& handler : chains[signo].special_handlers_) {
-      if (handler != nullptr && handler(signo, siginfo, ucontext_raw)) {
+      if (handler.sc_sigaction == nullptr) {
+        break;
+      }
+
+      // The native bridge signal handler might not return.
+      // Avoid setting the thread local flag in this case, since we'll never
+      // get a chance to restore it.
+      bool handler_noreturn = (handler.sc_flags & SIGCHAIN_ALLOW_NORETURN);
+      sigset_t previous_mask;
+      linked_sigprocmask(SIG_SETMASK, &handler.sc_mask, &previous_mask);
+
+      ScopedHandlingSignal restorer;
+      if (!handler_noreturn) {
+        SetHandlingSignal(true);
+      }
+
+      if (handler.sc_sigaction(signo, siginfo, ucontext_raw)) {
         return;
       }
+
+      linked_sigprocmask(SIG_SETMASK, &previous_mask, nullptr);
     }
   }
 
@@ -246,7 +265,7 @@
   if ((handler_flags & SA_NODEFER)) {
     sigdelset(&mask, signo);
   }
-  sigprocmask(SIG_SETMASK, &mask, nullptr);
+  linked_sigprocmask(SIG_SETMASK, &mask, nullptr);
 
   if ((handler_flags & SA_SIGINFO)) {
     chains[signo].action_.sa_sigaction(signo, siginfo, ucontext_raw);
@@ -263,6 +282,8 @@
 }
 
 extern "C" int sigaction(int signal, const struct sigaction* new_action, struct sigaction* old_action) {
+  InitializeSignalChain();
+
   // If this signal has been claimed as a signal chain, record the user's
   // action but don't pass it on to the kernel.
   // Note that we check that the signal number is in range here.  An out of range signal
@@ -285,11 +306,12 @@
 
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
-  InitializeSignalChain();
   return linked_sigaction(signal, new_action, old_action);
 }
 
 extern "C" sighandler_t signal(int signo, sighandler_t handler) {
+  InitializeSignalChain();
+
   if (signo < 0 || signo > _NSIG) {
     errno = EINVAL;
     return SIG_ERR;
@@ -311,7 +333,6 @@
 
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
-  InitializeSignalChain();
   if (linked_sigaction(signo, &sa, &sa) == -1) {
     return SIG_ERR;
   }
@@ -321,11 +342,15 @@
 
 #if !defined(__LP64__)
 extern "C" sighandler_t bsd_signal(int signo, sighandler_t handler) {
+  InitializeSignalChain();
+
   return signal(signo, handler);
 }
 #endif
 
 extern "C" int sigprocmask(int how, const sigset_t* bionic_new_set, sigset_t* bionic_old_set) {
+  InitializeSignalChain();
+
   // When inside a signal handler, forward directly to the actual sigprocmask.
   if (GetHandlingSignal()) {
     return linked_sigprocmask(how, bionic_new_set, bionic_old_set);
@@ -348,57 +373,24 @@
     new_set_ptr = &tmpset;
   }
 
-  InitializeSignalChain();
   return linked_sigprocmask(how, new_set_ptr, bionic_old_set);
 }
 
-extern "C" void InitializeSignalChain() {
-  // Warning.
-  // Don't call this from within a signal context as it makes calls to
-  // dlsym.  Calling into the dynamic linker will result in locks being
-  // taken and if it so happens that a signal occurs while one of these
-  // locks is already taken, dlsym will block trying to reenter a
-  // mutex and we will never get out of it.
-  static bool initialized = false;
-  if (initialized) {
-    // Don't initialize twice.
-    return;
-  }
+extern "C" void AddSpecialSignalHandlerFn(int signal, SigchainAction* sa) {
+  InitializeSignalChain();
 
-  void* linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
-  if (linked_sigaction_sym == nullptr) {
-    linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
-    if (linked_sigaction_sym == nullptr ||
-        linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
-      fatal("Unable to find next sigaction in signal chain");
-    }
-  }
-
-  void* linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
-  if (linked_sigprocmask_sym == nullptr) {
-    linked_sigprocmask_sym = dlsym(RTLD_DEFAULT, "sigprocmask");
-    if (linked_sigprocmask_sym == nullptr ||
-        linked_sigprocmask_sym == reinterpret_cast<void*>(sigprocmask)) {
-      fatal("Unable to find next sigprocmask in signal chain");
-    }
-  }
-
-  linked_sigaction = reinterpret_cast<decltype(linked_sigaction)>(linked_sigaction_sym);
-  linked_sigprocmask = reinterpret_cast<decltype(linked_sigprocmask)>(linked_sigprocmask_sym);
-  initialized = true;
-}
-
-extern "C" void AddSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn) {
   if (signal <= 0 || signal >= _NSIG) {
     fatal("Invalid signal %d", signal);
   }
 
   // Set the managed_handler.
-  chains[signal].AddSpecialHandler(fn);
+  chains[signal].AddSpecialHandler(sa);
   chains[signal].Claim(signal);
 }
 
-extern "C" void RemoveSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn) {
+extern "C" void RemoveSpecialSignalHandlerFn(int signal, bool (*fn)(int, siginfo_t*, void*)) {
+  InitializeSignalChain();
+
   if (signal <= 0 || signal >= _NSIG) {
     fatal("Invalid signal %d", signal);
   }
@@ -407,14 +399,16 @@
 }
 
 extern "C" void EnsureFrontOfChain(int signal) {
+  InitializeSignalChain();
+
   if (signal <= 0 || signal >= _NSIG) {
     fatal("Invalid signal %d", signal);
   }
 
   // Read the current action without looking at the chain, it should be the expected action.
   struct sigaction current_action;
-  InitializeSignalChain();
   linked_sigaction(signal, nullptr, &current_action);
+
   // If the sigactions don't match then we put the current action on the chain and make ourself as
   // the main action.
   if (current_action.sa_sigaction != SignalChain::Handler) {
diff --git a/sigchainlib/sigchain.h b/sigchainlib/sigchain.h
index 960d221..23fba03 100644
--- a/sigchainlib/sigchain.h
+++ b/sigchainlib/sigchain.h
@@ -18,14 +18,21 @@
 #define ART_SIGCHAINLIB_SIGCHAIN_H_
 
 #include <signal.h>
+#include <stdint.h>
 
 namespace art {
 
-extern "C" void InitializeSignalChain();
+// Handlers that exit without returning to their caller (e.g. via siglongjmp) must pass this flag.
+static constexpr uint64_t SIGCHAIN_ALLOW_NORETURN = 0x1UL;
 
-typedef bool (*SpecialSignalHandlerFn)(int, siginfo_t*, void*);
-extern "C" void AddSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn);
-extern "C" void RemoveSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn);
+struct SigchainAction {
+  bool (*sc_sigaction)(int, siginfo_t*, void*);
+  sigset_t sc_mask;
+  uint64_t sc_flags;
+};
+
+extern "C" void AddSpecialSignalHandlerFn(int signal, SigchainAction* sa);
+extern "C" void RemoveSpecialSignalHandlerFn(int signal, bool (*fn)(int, siginfo_t*, void*));
 
 extern "C" void EnsureFrontOfChain(int signal);
 
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index d6a5e12..edce965 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -48,24 +48,19 @@
 
 namespace art {
 
-extern "C" void InitializeSignalChain() {
-  log("InitializeSignalChain is not exported by the main executable.");
-  abort();
-}
-
 extern "C" void EnsureFrontOfChain(int signal ATTRIBUTE_UNUSED) {
   log("EnsureFrontOfChain is not exported by the main executable.");
   abort();
 }
 
 extern "C" void AddSpecialSignalHandlerFn(int signal ATTRIBUTE_UNUSED,
-                                          SpecialSignalHandlerFn fn ATTRIBUTE_UNUSED) {
+                                          SigchainAction* sa ATTRIBUTE_UNUSED) {
   log("SetSpecialSignalHandlerFn is not exported by the main executable.");
   abort();
 }
 
 extern "C" void RemoveSpecialSignalHandlerFn(int signal ATTRIBUTE_UNUSED,
-                                          SpecialSignalHandlerFn fn ATTRIBUTE_UNUSED) {
+                                             bool (*fn)(int, siginfo_t*, void*) ATTRIBUTE_UNUSED) {
   log("SetSpecialSignalHandlerFn is not exported by the main executable.");
   abort();
 }
diff --git a/sigchainlib/version-script32.txt b/sigchainlib/version-script32.txt
index f360efa..2340785 100644
--- a/sigchainlib/version-script32.txt
+++ b/sigchainlib/version-script32.txt
@@ -1,6 +1,5 @@
 {
 global:
-  InitializeSignalChain;
   EnsureFrontOfChain;
   AddSpecialSignalHandlerFn;
   RemoveSpecialSignalHandlerFn;
diff --git a/sigchainlib/version-script64.txt b/sigchainlib/version-script64.txt
index 319d1c6..acf3630 100644
--- a/sigchainlib/version-script64.txt
+++ b/sigchainlib/version-script64.txt
@@ -1,6 +1,5 @@
 {
 global:
-  InitializeSignalChain;
   EnsureFrontOfChain;
   AddSpecialSignalHandlerFn;
   RemoveSpecialSignalHandlerFn;
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 852ec2e..343a762 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -3,7 +3,7 @@
 Ready for native bridge tests.
 Checking for support.
 Getting trampoline for JNI_OnLoad with shorty (null).
-Test ART callbacks: all JNI function number is 11.
+Test ART callbacks: all JNI function number is 12.
     name:booleanMethod, signature:(ZZZZZZZZZZ)Z, shorty:ZZZZZZZZZZZ.
     name:byteMethod, signature:(BBBBBBBBBB)B, shorty:BBBBBBBBBBB.
     name:charMethod, signature:(CCCCCCCCCC)C, shorty:CCCCCCCCCCC.
@@ -14,6 +14,7 @@
     name:testGetMirandaMethodNative, signature:()Ljava/lang/reflect/Method;, shorty:L.
     name:testNewStringObject, signature:()V, shorty:V.
     name:testSignal, signature:()I, shorty:I.
+    name:testSignalHandlerNotReturn, signature:()V, shorty:V.
     name:testZeroLengthByteBuffers, signature:()V, shorty:V.
 trampoline_JNI_OnLoad called!
 JNI_OnLoad called
@@ -62,3 +63,18 @@
 Getting trampoline for Java_Main_testSignal with shorty I.
 NB signal handler with signal 11.
 NB signal handler with signal 4.
+Loading invalid library 'libinvalid.so' from Java, which will fail.
+Checking for support.
+Was to load 'libinvalid.so', force fail.
+getError() in native bridge.
+Catch UnsatisfiedLinkError exception as expected.
+Getting trampoline for Java_Main_testSignalHandlerNotReturn with shorty V.
+start testSignalHandlerNotReturn
+raising first SIGSEGV
+NB signal handler with signal 11.
+handling first SIGSEGV, will raise another
+unblock SIGSEGV in handler
+raising second SIGSEGV
+NB signal handler with signal 11.
+handling second SIGSEGV, will jump back to test function
+back to test from signal handler via siglongjmp(), and done!
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 87287f8..307fd9b 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -26,6 +26,7 @@
 #include "stdio.h"
 #include "unistd.h"
 #include "sys/stat.h"
+#include "setjmp.h"
 
 #include "base/macros.h"
 #include "nativebridge/native_bridge.h"
@@ -191,6 +192,19 @@
   abort();
 }
 
+static void raise_sigsegv() {
+#if defined(__arm__) || defined(__i386__) || defined(__aarch64__)
+  *go_away_compiler = 'a';
+#elif defined(__x86_64__)
+  // Cause a SEGV using an instruction known to be 2 bytes long to account for hardcoded jump
+  // in the signal handler
+  asm volatile("movl $0, %%eax;" "movb %%ah, (%%rax);" : : : "%eax");
+#else
+  // On other architectures we simulate SEGV.
+  kill(getpid(), SIGSEGV);
+#endif
+}
+
 static jint trampoline_Java_Main_testSignal(JNIEnv*, jclass) {
   // Install the sigaction handler above, which should *not* be reached as the native-bridge
   // handler should be called first. Note: we won't chain at all, if we ever get here, we'll die.
@@ -203,16 +217,7 @@
 
   // Test segv
   sigaction(SIGSEGV, &tmp, nullptr);
-#if defined(__arm__) || defined(__i386__) || defined(__aarch64__)
-  *go_away_compiler = 'a';
-#elif defined(__x86_64__)
-  // Cause a SEGV using an instruction known to be 2 bytes long to account for hardcoded jump
-  // in the signal handler
-  asm volatile("movl $0, %%eax;" "movb %%ah, (%%rax);" : : : "%eax");
-#else
-  // On other architectures we simulate SEGV.
-  kill(getpid(), SIGSEGV);
-#endif
+  raise_sigsegv();
 
   // Test sigill
   sigaction(SIGILL, &tmp, nullptr);
@@ -221,6 +226,135 @@
   return 1234;
 }
 
+// Status of the tricky control path of testSignalHandlerNotReturn.
+//
+// "kNone" is the default status except testSignalHandlerNotReturn,
+// others are used by testSignalHandlerNotReturn.
+enum class TestStatus {
+  kNone,
+  kRaiseFirst,
+  kHandleFirst,
+  kRaiseSecond,
+  kHandleSecond,
+};
+
+// State transition helper for testSignalHandlerNotReturn.
+class SignalHandlerTestStatus {
+ public:
+  SignalHandlerTestStatus() : state_(TestStatus::kNone) {
+  }
+
+  TestStatus Get() {
+    return state_;
+  }
+
+  void Reset() {
+    Set(TestStatus::kNone);
+  }
+
+  void Set(TestStatus state) {
+    switch (state) {
+      case TestStatus::kNone:
+        AssertState(TestStatus::kHandleSecond);
+        break;
+
+      case TestStatus::kRaiseFirst:
+        AssertState(TestStatus::kNone);
+        break;
+
+      case TestStatus::kHandleFirst:
+        AssertState(TestStatus::kRaiseFirst);
+        break;
+
+      case TestStatus::kRaiseSecond:
+        AssertState(TestStatus::kHandleFirst);
+        break;
+
+      case TestStatus::kHandleSecond:
+        AssertState(TestStatus::kRaiseSecond);
+        break;
+
+      default:
+        printf("ERROR: unknown state\n");
+        abort();
+    }
+
+    state_ = state;
+  }
+
+ private:
+  TestStatus state_;
+
+  void AssertState(TestStatus expected) {
+    if (state_ != expected) {
+      printf("ERROR: unexpected state, was %d, expected %d\n", state_, expected);
+    }
+  }
+};
+
+static SignalHandlerTestStatus gSignalTestStatus;
+// The context is used to jump out from signal handler.
+static sigjmp_buf gSignalTestJmpBuf;
+
+// Test whether NativeBridge can receive future signal when its handler doesn't return.
+//
+// Control path:
+//  1. Raise first SIGSEGV in test function.
+//  2. Raise another SIGSEGV in NativeBridge's signal handler which is handling
+//     the first SIGSEGV.
+//  3. Expect that NativeBridge's signal handler invokes again. And jump back
+//     to test function in when handling second SIGSEGV.
+//  4. Exit test.
+//
+// NOTE: sigchain should be aware that "special signal handler" may not return.
+//       Pay attention if this case fails.
+static void trampoline_Java_Main_testSignalHandlerNotReturn(JNIEnv*, jclass) {
+  if (gSignalTestStatus.Get() != TestStatus::kNone) {
+    printf("ERROR: test already started?\n");
+    return;
+  }
+  printf("start testSignalHandlerNotReturn\n");
+
+  if (sigsetjmp(gSignalTestJmpBuf, 1) == 0) {
+    gSignalTestStatus.Set(TestStatus::kRaiseFirst);
+    printf("raising first SIGSEGV\n");
+    raise_sigsegv();
+  } else {
+    // jump to here from signal handler when handling second SIGSEGV.
+    if (gSignalTestStatus.Get() != TestStatus::kHandleSecond) {
+      printf("ERROR: not jump from second SIGSEGV?\n");
+      return;
+    }
+    gSignalTestStatus.Reset();
+    printf("back to test from signal handler via siglongjmp(), and done!\n");
+  }
+}
+
+// Signal handler for testSignalHandlerNotReturn.
+// This handler won't return.
+static bool NotReturnSignalHandler() {
+  if (gSignalTestStatus.Get() == TestStatus::kRaiseFirst) {
+    // handling first SIGSEGV
+    gSignalTestStatus.Set(TestStatus::kHandleFirst);
+    printf("handling first SIGSEGV, will raise another\n");
+    sigset_t set;
+    sigemptyset(&set);
+    sigaddset(&set, SIGSEGV);
+    printf("unblock SIGSEGV in handler\n");
+    sigprocmask(SIG_UNBLOCK, &set, nullptr);
+    gSignalTestStatus.Set(TestStatus::kRaiseSecond);
+    printf("raising second SIGSEGV\n");
+    raise_sigsegv();    // raise second SIGSEGV
+  } else if (gSignalTestStatus.Get() == TestStatus::kRaiseSecond) {
+    // handling second SIGSEGV
+    gSignalTestStatus.Set(TestStatus::kHandleSecond);
+    printf("handling second SIGSEGV, will jump back to test function\n");
+    siglongjmp(gSignalTestJmpBuf, 1);
+  }
+  printf("ERROR: should not reach here!\n");
+  return false;
+}
+
 NativeBridgeMethod gNativeBridgeMethods[] = {
   { "JNI_OnLoad", "", true, nullptr,
     reinterpret_cast<void*>(trampoline_JNI_OnLoad) },
@@ -246,6 +380,8 @@
     reinterpret_cast<void*>(trampoline_Java_Main_testZeroLengthByteBuffers) },
   { "testSignal", "()I", true, nullptr,
     reinterpret_cast<void*>(trampoline_Java_Main_testSignal) },
+  { "testSignalHandlerNotReturn", "()V", true, nullptr,
+    reinterpret_cast<void*>(trampoline_Java_Main_testSignalHandlerNotReturn) },
 };
 
 static NativeBridgeMethod* find_native_bridge_method(const char *name) {
@@ -285,6 +421,10 @@
 }
 
 extern "C" void* native_bridge_loadLibrary(const char* libpath, int flag) {
+  if (strstr(libpath, "libinvalid.so") != nullptr) {
+    printf("Was to load 'libinvalid.so', force fail.\n");
+    return nullptr;
+  }
   size_t len = strlen(libpath);
   char* tmp = new char[len + 10];
   strncpy(tmp, libpath, len);
@@ -300,7 +440,7 @@
     printf("Handle = nullptr!\n");
     printf("Was looking for %s.\n", libpath);
     printf("Error = %s.\n", dlerror());
-    char cwd[1024];
+    char cwd[1024] = {'\0'};
     if (getcwd(cwd, sizeof(cwd)) != nullptr) {
       printf("Current working dir: %s\n", cwd);
     }
@@ -395,10 +535,8 @@
 #endif
 #endif
 
-// A dummy special handler, continueing after the faulting location. This code comes from
-// 004-SignalTest.
-static bool nb_signalhandler(int sig, siginfo_t* info ATTRIBUTE_UNUSED, void* context) {
-  printf("NB signal handler with signal %d.\n", sig);
+static bool StandardSignalHandler(int sig, siginfo_t* info ATTRIBUTE_UNUSED,
+                                     void* context) {
   if (sig == SIGSEGV) {
 #if defined(__arm__)
     struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
@@ -423,6 +561,21 @@
   return true;
 }
 
+// A dummy special handler, continueing after the faulting location. This code comes from
+// 004-SignalTest.
+static bool nb_signalhandler(int sig, siginfo_t* info, void* context) {
+  printf("NB signal handler with signal %d.\n", sig);
+
+  if (gSignalTestStatus.Get() == TestStatus::kNone) {
+    return StandardSignalHandler(sig, info, context);
+  } else if (sig == SIGSEGV) {
+    return NotReturnSignalHandler();
+  } else {
+    printf("ERROR: should not reach here!\n");
+    return false;
+  }
+}
+
 static ::android::NativeBridgeSignalHandlerFn native_bridge_getSignalHandler(int signal) {
   // Test segv for already claimed signal, and sigill for not claimed signal
   if ((signal == SIGSEGV) || (signal == SIGILL)) {
@@ -437,8 +590,8 @@
 }
 
 extern "C" const char* native_bridge_getError() {
-  printf("dlerror() in native bridge.\n");
-  return nullptr;
+  printf("getError() in native bridge.\n");
+  return "";
 }
 
 extern "C" bool native_bridge_isPathSupported(const char* library_path ATTRIBUTE_UNUSED) {
diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run
index 9290dd3..22f5c67 100644
--- a/test/115-native-bridge/run
+++ b/test/115-native-bridge/run
@@ -23,6 +23,7 @@
 ln -sf ${LIBPATH}/libnativebridgetest.so .
 touch libarttest.so
 touch libarttestd.so
+touch libinvalid.so
 ln -sf ${LIBPATH}/libarttest.so libarttest2.so
 ln -sf ${LIBPATH}/libarttestd.so libarttestd2.so
 
diff --git a/test/115-native-bridge/src/NativeBridgeMain.java b/test/115-native-bridge/src/NativeBridgeMain.java
index c298b1b..11eaa84 100644
--- a/test/115-native-bridge/src/NativeBridgeMain.java
+++ b/test/115-native-bridge/src/NativeBridgeMain.java
@@ -16,6 +16,7 @@
 
 import java.lang.reflect.Method;
 import java.lang.System;
+import java.lang.Exception;
 
 // This is named Main as it is a copy of JniTest, so that we can re-use the native implementations
 // from libarttest.
@@ -33,6 +34,8 @@
         testEnvironment();
         testNewStringObject();
         testSignalHandler();
+        testGetErrorByLoadInvalidLibrary();
+        testSignalHandlerNotReturn();
     }
 
     public static native void testFindClassOnAttachedNativeThread();
@@ -183,6 +186,22 @@
     }
 
     private static native int testSignal();
+
+    // Test the path from Java to getError() of NativeBridge.
+    //
+    // Load invalid library 'libinvalid.so' from Java. Library loading will fail since it's
+    // invalid (empty file). ART, NativeLoader actually, calls getError() to dump error message.
+    // After that in Java, catch UnsatisfiedLinkError exception to confirm.
+    private static void testGetErrorByLoadInvalidLibrary() {
+        System.out.println("Loading invalid library 'libinvalid.so' from Java, which will fail.");
+        try {
+            System.loadLibrary("invalid");
+        } catch (java.lang.UnsatisfiedLinkError e){
+            System.out.println("Catch UnsatisfiedLinkError exception as expected.");
+        }
+    }
+
+    private static native void testSignalHandlerNotReturn();
 }
 
 public class NativeBridgeMain {
diff --git a/test/121-modifiers/info.txt b/test/121-modifiers/info.txt
index 129aee8..335df53 100644
--- a/test/121-modifiers/info.txt
+++ b/test/121-modifiers/info.txt
@@ -14,5 +14,5 @@
 mv NonInf.out classes/NonInf.class
 mv Main.class A.class A\$B.class A\$C.class classes/
 dx --debug --dex --output=classes.dex classes
-baksmali classes.dex
+baksmali disassemble classes.dex
 mv out/*.smali smali/
diff --git a/test/527-checker-array-access-simd/expected.txt b/test/527-checker-array-access-simd/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/527-checker-array-access-simd/expected.txt
diff --git a/test/527-checker-array-access-simd/info.txt b/test/527-checker-array-access-simd/info.txt
new file mode 100644
index 0000000..f147943
--- /dev/null
+++ b/test/527-checker-array-access-simd/info.txt
@@ -0,0 +1 @@
+Test arm- and arm64-specific array access optimization for simd loops.
diff --git a/test/527-checker-array-access-simd/src/Main.java b/test/527-checker-array-access-simd/src/Main.java
new file mode 100644
index 0000000..8af5465
--- /dev/null
+++ b/test/527-checker-array-access-simd/src/Main.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Address1>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-DAG:             <<Address2:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Address2>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkIntCase(int[]) GVN$after_arch (after)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Address1>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-NOT:                                    IntermediateAddress
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Address1>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkIntCase(int[]) disassembly (after)
+  /// CHECK:                                        IntermediateAddressIndex
+  /// CHECK-NEXT:                                   add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2
+  public static void checkIntCase(int[] a) {
+    for (int i = 0; i < 128; i++) {
+      a[i] += 5;
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const0:i\d+>>        IntConstant 0
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Address1>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-DAG:             <<Address2:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Address2>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) GVN$after_arch (after)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const0:i\d+>>        IntConstant 0
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Address1>>]
+  /// CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>]
+  /// CHECK-NOT:                                    IntermediateAddress
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Address1>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) disassembly (after)
+  /// CHECK:                                        IntermediateAddressIndex
+  /// CHECK-NEXT:                                   add w{{[0-9]+}}, w{{[0-9]+}}, #0x{{[0-9a-fA-F]+}}
+  /// CHECK:                                        VecLoad
+  /// CHECK-NEXT:                                   ldr q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+  /// CHECK:                                        VecStore
+  /// CHECK-NEXT:                                   str q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+  public static void checkByteCase(byte[] a) {
+    for (int i = 0; i < 128; i++) {
+      a[i] += 5;
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Repl>>]
+
+  /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:             <<Array:l\d+>>         ParameterValue
+  /// CHECK-DAG:             <<Const0:i\d+>>        IntConstant 0
+  /// CHECK-DAG:             <<Const5:i\d+>>        IntConstant 5
+  /// CHECK-DAG:             <<Repl:d\d+>>          VecReplicateScalar [<<Const5>>]
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Repl>>]
+  /// CHECK-NOT:                                    IntermediateAddress
+  public static void checkSingleAccess(int[] a) {
+    for (int i = 0; i < 128; i++) {
+      a[i] = 5;
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:             <<Array1:l\d+>>        ParameterValue
+  /// CHECK-DAG:             <<Array2:l\d+>>        ParameterValue
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array1>>,<<Index>>]
+  /// CHECK-DAG:             <<Cnv:d\d+>>           VecCnv [<<Load>>]
+  /// CHECK-DAG:                                    VecStore [<<Array2>>,<<Index>>,<<Cnv>>]
+
+  /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:             <<Array1:l\d+>>        ParameterValue
+  /// CHECK-DAG:             <<Array2:l\d+>>        ParameterValue
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Const2:i\d+>>        IntConstant 2
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array1>>,<<Address1>>]
+  /// CHECK-DAG:             <<Cnv:d\d+>>           VecCnv [<<Load>>]
+  /// CHECK-DAG:             <<Address2:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:                                    VecStore [<<Array2>>,<<Address2>>,<<Cnv>>]
+
+  /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) GVN$after_arch (after)
+  /// CHECK-DAG:             <<Array1:l\d+>>        ParameterValue
+  /// CHECK-DAG:             <<Array2:l\d+>>        ParameterValue
+  /// CHECK-DAG:             <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK-DAG:             <<Const2:i\d+>>        IntConstant 2
+  //  -------------- Loop
+  /// CHECK-DAG:             <<Index:i\d+>>         Phi
+  /// CHECK-DAG:                                    If
+  /// CHECK-DAG:             <<Address1:i\d+>>      IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+  /// CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array1>>,<<Address1>>]
+  /// CHECK-DAG:             <<Cnv:d\d+>>           VecCnv [<<Load>>]
+  /// CHECK-NOT:                                    IntermediateAddress
+  /// CHECK-DAG:                                    VecStore [<<Array2>>,<<Address1>>,<<Cnv>>]
+
+  /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) disassembly (after)
+  /// CHECK:                                        IntermediateAddressIndex
+  /// CHECK-NEXT:                                   add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2
+  public static void checkInt2Float(int[] a, float[] b) {
+    for (int i = 0; i < 128; i++) {
+      b[i] = (float) a[i];
+    }
+  }
+
+  public static final int ARRAY_SIZE = 1024;
+
+  public static int calcArraySum(int[] a, byte[] b, float[] c) {
+    int sum = 0;
+    for (int i = 0; i < 128; i++) {
+      sum += a[i] + b[i] + (int) c[i];
+    }
+    return sum;
+  }
+
+  public static void main(String[] args) {
+    byte[] ba = new byte[ARRAY_SIZE];
+    int[] ia = new int[ARRAY_SIZE];
+    float[] fa = new float[ARRAY_SIZE];
+
+    checkSingleAccess(ia);
+    checkIntCase(ia);
+    checkByteCase(ba);
+    checkInt2Float(ia, fa);
+
+    assertIntEquals(3200, calcArraySum(ia, ba, fa));
+  }
+}
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
index a78021f..027a0ea 100644
--- a/test/551-checker-shifter-operand/build
+++ b/test/551-checker-shifter-operand/build
@@ -168,7 +168,7 @@
 
 if [ "${HAS_SMALI}" = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
index e967398..bf09a6a 100644
--- a/test/551-checker-shifter-operand/src/Main.java
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -642,6 +642,123 @@
 
 
   // Each test line below should see one merge.
+  //
+  /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (before)
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK-NOT:                        UShr
+  //
+  // Note: simplification after inlining removes `b << 32`, `b >> 32` and `b >>> 32`.
+  //
+  /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (after)
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK-NOT:                        UShr
+  //
+  // Note: simplification followed by GVN exposes the common subexpressions between shifts with larger distance
+  //       `b << 62`, `b << 63` etc. and the equivalent smaller distances.
+  //
+  /// CHECK-START: void Main.$opt$validateShiftInt(int, int) GVN (after)
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK:                            Shl
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK:                            Shr
+  /// CHECK-NOT:                        Shl
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK:                            UShr
+  /// CHECK-NOT:                        UShr
+  //
   /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after)
   /// CHECK:                            DataProcWithShifterOp
   /// CHECK:                            DataProcWithShifterOp
@@ -670,14 +787,7 @@
   /// CHECK:                            DataProcWithShifterOp
   /// CHECK:                            DataProcWithShifterOp
   /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
   /// CHECK-NOT:                        DataProcWithShifterOp
-  // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier.
 
   /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after)
   /// CHECK-NOT:                        Shl
@@ -712,14 +822,7 @@
   /// CHECK:                            DataProcWithShifterOp
   /// CHECK:                            DataProcWithShifterOp
   /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
-  /// CHECK:                            DataProcWithShifterOp
   /// CHECK-NOT:                        DataProcWithShifterOp
-  // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier.
 
   /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        Shl
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index 2d9daf1..0080ffa 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -468,6 +468,19 @@
     return sum;
   }
 
+  // Ensure double induction does not "overshoot" the subscript range.
+  private static int getIncr2(int[] arr) {
+    for (int i = 0; i < 12; ) {
+      arr[i++] = 30;
+      arr[i++] = 29;
+    }
+    int sum = 0;
+    for (int i = 0; i < 12; i++) {
+      sum += arr[i];
+    }
+    return sum;
+  }
+
   // TODO: handle as closed/empty eventually?
   static int mainIndexReturnedN(int n) {
     int i;
@@ -869,6 +882,7 @@
     expectEquals(1, periodicReturned9());
     expectEquals(0, periodicReturned10());
     expectEquals(21, getSum21());
+    expectEquals(354, getIncr2(new int[12]));
     for (int n = -4; n < 4; n++) {
       int tc = (n <= 0) ? 0 : n;
       expectEquals(tc, mainIndexReturnedN(n));
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index ba1e142..97048eb 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -76,6 +76,7 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.div(int) loop_optimization (after)
+  /// CHECK-NOT: VecDiv
   //
   //  Not supported on any architecture.
   //
@@ -159,14 +160,81 @@
   // Shift sanity.
   //
 
+  // Expose constants to optimizing compiler, but not to front-end.
+  public static int $opt$inline$IntConstant32()       { return 32; }
+  public static int $opt$inline$IntConstant33()       { return 33; }
+  public static int $opt$inline$IntConstantMinus254() { return -254; }
+
+  /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 32                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>> ArrayGet                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr32() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr32() {
+    // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
-      a[i] >>>= 32;  // 0, since & 31
+      a[i] >>>= $opt$inline$IntConstant32();  // 0, since & 31
   }
 
+  /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 33                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr33() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr33() {
     for (int i = 0; i < 128; i++)
-      a[i] >>>= 33;  // 1, since & 31
+      a[i] >>>= $opt$inline$IntConstant33();  // 1, since & 31
+  }
+
+  /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254                      loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:i\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  static void shrMinus254() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 31
   }
 
   //
@@ -240,9 +308,14 @@
     for (int i = 0; i < 128; i++) {
       expectEquals(0x1fffffff, a[i], "shr33");
     }
+    shrMinus254();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x07ffffff, a[i], "shrMinus254");
+    }
+    // Bit-wise not operator.
     not();
     for (int i = 0; i < 128; i++) {
-      expectEquals(0xe0000000, a[i], "not");
+      expectEquals(0xf8000000, a[i], "not");
     }
     // Done.
     System.out.println("passed");
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index 5641182..e42c716 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -74,6 +74,7 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.div(long) loop_optimization (after)
+  /// CHECK-NOT: VecDiv
   //
   //  Not supported on any architecture.
   //
@@ -157,14 +158,81 @@
   // Shift sanity.
   //
 
+  // Expose constants to optimizing compiler, but not to front-end.
+  public static int $opt$inline$IntConstant64()       { return 64; }
+  public static int $opt$inline$IntConstant65()       { return 65; }
+  public static int $opt$inline$IntConstantMinus254() { return -254; }
+
+  /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 64                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>> ArrayGet                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr64() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr64() {
+    // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
-      a[i] >>>= 64;  // 0, since & 63
+      a[i] >>>= $opt$inline$IntConstant64();  // 0, since & 63
   }
 
+  /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 65                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr65() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr65() {
     for (int i = 0; i < 128; i++)
-      a[i] >>>= 65;  // 1, since & 63
+      a[i] >>>= $opt$inline$IntConstant65();  // 1, since & 63
+  }
+
+  /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254                      loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:j\d+>>  ArrayGet                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  static void shrMinus254() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 63
   }
 
   //
@@ -238,9 +306,14 @@
     for (int i = 0; i < 128; i++) {
       expectEquals(0x1fffffffffffffffL, a[i], "shr65");
     }
+    shrMinus254();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x07ffffffffffffffL, a[i], "shrMinus254");
+    }
+    // Bit-wise not operator.
     not();
     for (int i = 0; i < 128; i++) {
-      expectEquals(0xe000000000000000L, a[i], "not");
+      expectEquals(0xf800000000000000L, a[i], "not");
     }
     // Done.
     System.out.println("passed");
diff --git a/test/650-checker-inline-access-thunks/expected.txt b/test/650-checker-inline-access-thunks/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/650-checker-inline-access-thunks/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/650-checker-inline-access-thunks/info.txt b/test/650-checker-inline-access-thunks/info.txt
new file mode 100644
index 0000000..e1a1eb2
--- /dev/null
+++ b/test/650-checker-inline-access-thunks/info.txt
@@ -0,0 +1 @@
+Test that access thunks for nested classes are inlined.
diff --git a/test/650-checker-inline-access-thunks/src/Main.java b/test/650-checker-inline-access-thunks/src/Main.java
new file mode 100644
index 0000000..17f5819
--- /dev/null
+++ b/test/650-checker-inline-access-thunks/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) {
+        Main m = new Main();
+        Nested n = new Nested();
+        n.$noinline$setPrivateIntField(m, 42);
+        System.out.println(n.$noinline$getPrivateIntField(m));
+    }
+
+    private int privateIntField;
+
+    private static class Nested {
+        /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before)
+        /// CHECK:                  InvokeStaticOrDirect
+
+        /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before)
+        /// CHECK-NOT:              InstanceFieldSet
+
+        /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after)
+        /// CHECK-NOT:              InvokeStaticOrDirect
+
+        /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after)
+        /// CHECK:                  InstanceFieldSet
+
+        public void $noinline$setPrivateIntField(Main m, int value) {
+            m.privateIntField = value;
+        }
+
+        /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before)
+        /// CHECK:                  InvokeStaticOrDirect
+
+        /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before)
+        /// CHECK-NOT:              InstanceFieldGet
+
+        /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after)
+        /// CHECK-NOT:              InvokeStaticOrDirect
+
+        /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after)
+        /// CHECK:                  InstanceFieldGet
+
+        public int $noinline$getPrivateIntField(Main m) {
+            return m.privateIntField;
+        }
+    }
+}
diff --git a/test/651-checker-byte-simd-minmax/expected.txt b/test/651-checker-byte-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-byte-simd-minmax/info.txt b/test/651-checker-byte-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
new file mode 100644
index 0000000..8211ace
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Initialize cross-values for all possible values.
+    int total = 256 * 256;
+    byte[] x = new byte[total];
+    byte[] y = new byte[total];
+    byte[] z = new byte[total];
+    int k = 0;
+    for (int i = 0; i < 256; i++) {
+      for (int j = 0; j < 256; j++) {
+        x[k] = 0;
+        y[k] = (byte) i;
+        z[k] = (byte) j;
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-char-simd-minmax/expected.txt b/test/651-checker-char-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-char-simd-minmax/info.txt b/test/651-checker-char-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
new file mode 100644
index 0000000..5ce7b94
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(char[] x, char[] y, char[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (char) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(char[] x, char[] y, char[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (char) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    char[] interesting = {
+      0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff,
+      0x0100, 0x0101, 0x017f, 0x0180, 0x0181, 0x01ff,
+      0x7f00, 0x7f01, 0x7f7f, 0x7f80, 0x7f81, 0x7fff,
+      0x8000, 0x8001, 0x807f, 0x8080, 0x8081, 0x80ff,
+      0x8100, 0x8101, 0x817f, 0x8180, 0x8181, 0x81ff,
+      0xff00, 0xff01, 0xff7f, 0xff80, 0xff81, 0xffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    char[] x = new char[total];
+    char[] y = new char[total];
+    char[] z = new char[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      char expected = (char) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      char expected = (char) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-double-simd-minmax/expected.txt b/test/651-checker-double-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-double-simd-minmax/info.txt b/test/651-checker-double-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
new file mode 100644
index 0000000..e1711ae
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(double[], double[], double[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinDoubleDouble loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(double[] x, double[] y, double[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(double[], double[], double[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO-x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(double[] x, double[] y, double[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    double[] interesting = {
+      -0.0f,
+      +0.0f,
+      -1.0f,
+      +1.0f,
+      -3.14f,
+      +3.14f,
+      -100.0f,
+      +100.0f,
+      -4444.44f,
+      +4444.44f,
+      Double.MIN_NORMAL,
+      Double.MIN_VALUE,
+      Double.MAX_VALUE,
+      Double.NEGATIVE_INFINITY,
+      Double.POSITIVE_INFINITY,
+      Double.NaN
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    double[] x = new double[total];
+    double[] y = new double[total];
+    double[] z = new double[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      double expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      double expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(double expected, double result) {
+    // Tests the bits directly. This distinguishes correctly between +0.0
+    // and -0.0 and returns a canonical representation for all NaN.
+    long expected_bits = Double.doubleToLongBits(expected);
+    long result_bits = Double.doubleToLongBits(result);
+    if (expected_bits != result_bits) {
+      throw new Error("Expected: " + expected +
+          "(0x" + Long.toHexString(expected_bits) + "), found: " + result +
+          "(0x" + Long.toHexString(result_bits) + ")");
+    }
+  }
+}
diff --git a/test/651-checker-float-simd-minmax/expected.txt b/test/651-checker-float-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-float-simd-minmax/info.txt b/test/651-checker-float-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java
new file mode 100644
index 0000000..bd412e0
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(float[], float[], float[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:f\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinFloatFloat loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(float[] x, float[] y, float[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(float[], float[], float[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:f\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxFloatFloat loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(float[] x, float[] y, float[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    float[] interesting = {
+      -0.0f,
+      +0.0f,
+      -1.0f,
+      +1.0f,
+      -3.14f,
+      +3.14f,
+      -100.0f,
+      +100.0f,
+      -4444.44f,
+      +4444.44f,
+      Float.MIN_NORMAL,
+      Float.MIN_VALUE,
+      Float.MAX_VALUE,
+      Float.NEGATIVE_INFINITY,
+      Float.POSITIVE_INFINITY,
+      Float.NaN
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    float[] x = new float[total];
+    float[] y = new float[total];
+    float[] z = new float[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      float expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      float expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(float expected, float result) {
+    // Tests the bits directly. This distinguishes correctly between +0.0
+    // and -0.0 and returns a canonical representation for all NaN.
+    int expected_bits = Float.floatToIntBits(expected);
+    int result_bits = Float.floatToIntBits(result);
+    if (expected_bits != result_bits) {
+      throw new Error("Expected: " + expected +
+          "(0x" + Integer.toHexString(expected_bits) + "), found: " + result +
+          "(0x" + Integer.toHexString(result_bits) + ")");
+    }
+  }
+}
diff --git a/test/651-checker-int-simd-minmax/expected.txt b/test/651-checker-int-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-int-simd-minmax/info.txt b/test/651-checker-int-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
new file mode 100644
index 0000000..4e05a9d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(int[], int[], int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(int[] x, int[] y, int[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(int[], int[], int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(int[] x, int[] y, int[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    int[] interesting = {
+      0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff,
+      0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff,
+      0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff,
+      0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff,
+      0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff,
+      0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    int[] x = new int[total];
+    int[] y = new int[total];
+    int[] z = new int[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      int expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      int expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-long-simd-minmax/expected.txt b/test/651-checker-long-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-long-simd-minmax/info.txt b/test/651-checker-long-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java
new file mode 100644
index 0000000..51cf67e
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:j\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinLongLong loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Not directly supported for longs.
+  //
+  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(long[] x, long[] y, long[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:j\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxLongLong loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Not directly supported for longs.
+  //
+  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(long[] x, long[] y, long[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    long[] interesting = {
+      0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL,
+      0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL,
+      0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL,
+      0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL,
+      0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL,
+      0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL,
+      0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL,
+      0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL,
+      0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL,
+      0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL,
+      0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL,
+      0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    long[] x = new long[total];
+    long[] y = new long[total];
+    long[] z = new long[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      long expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      long expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-short-simd-minmax/expected.txt b/test/651-checker-short-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-short-simd-minmax/info.txt b/test/651-checker-short-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
new file mode 100644
index 0000000..f34f526
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    short[] interesting = {
+      (short) 0x0000, (short) 0x0001, (short) 0x007f,
+      (short) 0x0080, (short) 0x0081, (short) 0x00ff,
+      (short) 0x0100, (short) 0x0101, (short) 0x017f,
+      (short) 0x0180, (short) 0x0181, (short) 0x01ff,
+      (short) 0x7f00, (short) 0x7f01, (short) 0x7f7f,
+      (short) 0x7f80, (short) 0x7f81, (short) 0x7fff,
+      (short) 0x8000, (short) 0x8001, (short) 0x807f,
+      (short) 0x8080, (short) 0x8081, (short) 0x80ff,
+      (short) 0x8100, (short) 0x8101, (short) 0x817f,
+      (short) 0x8180, (short) 0x8181, (short) 0x81ff,
+      (short) 0xff00, (short) 0xff01, (short) 0xff7f,
+      (short) 0xff80, (short) 0xff81, (short) 0xffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    short[] x = new short[total];
+    short[] y = new short[total];
+    short[] z = new short[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
index 8695e0c..21dcf98 100644
--- a/test/901-hello-ti-agent/basics.cc
+++ b/test/901-hello-ti-agent/basics.cc
@@ -176,5 +176,22 @@
   return res == JVMTI_ERROR_UNATTACHED_THREAD;
 }
 
+extern "C" JNIEXPORT jstring JNICALL Java_art_Test901_getErrorName(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jint error) {
+  char* name;
+  jvmtiError res = jvmti_env->GetErrorName(static_cast<jvmtiError>(error), &name);
+  if (JvmtiErrorToException(env, jvmti_env, res)) {
+    return nullptr;
+  }
+
+  jstring ret_string = env->NewStringUTF(name);
+  jvmtiError dealloc = jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
+  if (JvmtiErrorToException(env, jvmti_env, dealloc)) {
+    return nullptr;
+  }
+
+  return ret_string;
+}
+
 }  // namespace Test901HelloTi
 }  // namespace art
diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt
index eb5b6a2..4177ffc 100644
--- a/test/901-hello-ti-agent/expected.txt
+++ b/test/901-hello-ti-agent/expected.txt
@@ -10,4 +10,67 @@
 4
 8
 JVMTI_ERROR_ILLEGAL_ARGUMENT
+1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+0 = JVMTI_ERROR_NONE
+9 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+10 = JVMTI_ERROR_INVALID_THREAD
+11 = JVMTI_ERROR_INVALID_THREAD_GROUP
+12 = JVMTI_ERROR_INVALID_PRIORITY
+13 = JVMTI_ERROR_THREAD_NOT_SUSPENDED
+14 = JVMTI_ERROR_THREAD_SUSPENDED
+15 = JVMTI_ERROR_THREAD_NOT_ALIVE
+4 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+20 = JVMTI_ERROR_INVALID_OBJECT
+21 = JVMTI_ERROR_INVALID_CLASS
+22 = JVMTI_ERROR_CLASS_NOT_PREPARED
+23 = JVMTI_ERROR_INVALID_METHODID
+24 = JVMTI_ERROR_INVALID_LOCATION
+25 = JVMTI_ERROR_INVALID_FIELDID
+5 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+31 = JVMTI_ERROR_NO_MORE_FRAMES
+32 = JVMTI_ERROR_OPAQUE_FRAME
+1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+34 = JVMTI_ERROR_TYPE_MISMATCH
+35 = JVMTI_ERROR_INVALID_SLOT
+4 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+40 = JVMTI_ERROR_DUPLICATE
+41 = JVMTI_ERROR_NOT_FOUND
+8 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+50 = JVMTI_ERROR_INVALID_MONITOR
+51 = JVMTI_ERROR_NOT_MONITOR_OWNER
+52 = JVMTI_ERROR_INTERRUPT
+7 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+60 = JVMTI_ERROR_INVALID_CLASS_FORMAT
+61 = JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION
+62 = JVMTI_ERROR_FAILS_VERIFICATION
+63 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED
+64 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED
+65 = JVMTI_ERROR_INVALID_TYPESTATE
+66 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED
+67 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED
+68 = JVMTI_ERROR_UNSUPPORTED_VERSION
+69 = JVMTI_ERROR_NAMES_DONT_MATCH
+70 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED
+71 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED
+7 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+79 = JVMTI_ERROR_UNMODIFIABLE_CLASS
+18 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+98 = JVMTI_ERROR_NOT_AVAILABLE
+99 = JVMTI_ERROR_MUST_POSSESS_CAPABILITY
+100 = JVMTI_ERROR_NULL_POINTER
+101 = JVMTI_ERROR_ABSENT_INFORMATION
+102 = JVMTI_ERROR_INVALID_EVENT_TYPE
+103 = JVMTI_ERROR_ILLEGAL_ARGUMENT
+104 = JVMTI_ERROR_NATIVE_METHOD
+1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+106 = JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED
+3 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+110 = JVMTI_ERROR_OUT_OF_MEMORY
+111 = JVMTI_ERROR_ACCESS_DENIED
+112 = JVMTI_ERROR_WRONG_PHASE
+113 = JVMTI_ERROR_INTERNAL
+1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
+115 = JVMTI_ERROR_UNATTACHED_THREAD
+116 = JVMTI_ERROR_INVALID_ENVIRONMENT
+1 times JVMTI_ERROR_ILLEGAL_ARGUMENT
 VMDeath
diff --git a/test/901-hello-ti-agent/src/art/Test901.java b/test/901-hello-ti-agent/src/art/Test901.java
index eef2188..7d853a7 100644
--- a/test/901-hello-ti-agent/src/art/Test901.java
+++ b/test/901-hello-ti-agent/src/art/Test901.java
@@ -32,6 +32,8 @@
     set(2);  // CLASS
     set(4);  // JNI
     set(8);  // Error.
+
+    testErrorNames();
   }
 
   private static void set(int i) {
@@ -44,7 +46,39 @@
     }
   }
 
+  private static void testErrorNames() {
+      int consecutiveErrors = 0;
+      String lastError = null;
+      for (int i = -1; i <= 117; i++) {
+          String errorName = null;
+          String error = null;
+          try {
+              errorName = getErrorName(i);
+          } catch (RuntimeException e) {
+              error = e.getMessage();
+          }
+
+          if (lastError != null &&
+                  (errorName != null || (error != null && !lastError.equals(error)))) {
+              System.out.println(consecutiveErrors + " times " + lastError);
+              lastError = null;
+              consecutiveErrors = 0;
+          }
+
+          if (errorName != null) {
+              System.out.println(i + " = " + errorName);
+          } else {
+              lastError = error;
+              consecutiveErrors++;
+          }
+      }
+      if (consecutiveErrors > 0) {
+          System.out.println(consecutiveErrors + " times " + lastError);
+      }
+  }
+
   private static native boolean checkLivePhase();
   private static native void setVerboseFlag(int flag, boolean value);
   private static native boolean checkUnattached();
+  private static native String getErrorName(int error);
 }
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 702b247..b128d1c 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -385,3 +385,10 @@
 5@1002 --(field@10)--> 1@1000 [size=16, length=-1]
 5@1002 --(field@9)--> 6@1000 [size=16, length=-1]
 ---
+
+default
+image
+zygote
+app
+
+3
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index e319f7d..ec36ceb 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -817,5 +817,266 @@
   return result;
 }
 
+using GetObjectHeapId = jvmtiError(*)(jvmtiEnv*, jlong, jint*, ...);
+static GetObjectHeapId gGetObjectHeapIdFn = nullptr;
+
+using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...);
+static GetHeapName gGetHeapNameFn = nullptr;
+
+using IterateThroughHeapExt = jvmtiError(*)(jvmtiEnv*,
+                                            jint,
+                                            jclass,
+                                            const jvmtiHeapCallbacks*,
+                                            const void*);
+static IterateThroughHeapExt gIterateThroughHeapExt = nullptr;
+
+
+static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) {
+  for (size_t i = 0; i != static_cast<size_t>(count); ++i) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id));
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].short_description));
+    for (size_t j = 0; j != static_cast<size_t>(extensions[i].param_count); ++j) {
+      jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params[j].name));
+    }
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params));
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].errors));
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  jint extension_count;
+  jvmtiExtensionFunctionInfo* extensions;
+  jvmtiError result = jvmti_env->GetExtensionFunctions(&extension_count, &extensions);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
+    return;
+  }
+
+  for (size_t i = 0; i != static_cast<size_t>(extension_count); ++i) {
+    if (strcmp("com.android.art.heap.get_object_heap_id", extensions[i].id) == 0) {
+      CHECK(gGetObjectHeapIdFn == nullptr);
+      gGetObjectHeapIdFn = reinterpret_cast<GetObjectHeapId>(extensions[i].func);
+
+      CHECK_EQ(extensions[i].param_count, 2);
+
+      CHECK_EQ(strcmp("tag", extensions[i].params[0].name), 0);
+      CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JLONG);
+      CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN);
+
+      CHECK_EQ(strcmp("heap_id", extensions[i].params[1].name), 0);
+      CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JINT);
+      CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_OUT);
+      CHECK_EQ(extensions[i].params[1].null_ok, false);
+
+      CHECK_EQ(extensions[i].error_count, 1);
+      CHECK(extensions[i].errors != nullptr);
+      CHECK(extensions[i].errors[0] == JVMTI_ERROR_NOT_FOUND);
+
+      continue;
+    }
+
+    if (strcmp("com.android.art.heap.get_heap_name", extensions[i].id) == 0) {
+      CHECK(gGetHeapNameFn == nullptr);
+      gGetHeapNameFn = reinterpret_cast<GetHeapName>(extensions[i].func);
+
+      CHECK_EQ(extensions[i].param_count, 2);
+
+      CHECK_EQ(strcmp("heap_id", extensions[i].params[0].name), 0);
+      CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT);
+      CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN);
+
+      CHECK_EQ(strcmp("heap_name", extensions[i].params[1].name), 0);
+      CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_CCHAR);
+      CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_ALLOC_BUF);
+      CHECK_EQ(extensions[i].params[1].null_ok, false);
+
+      CHECK_EQ(extensions[i].error_count, 1);
+      CHECK(extensions[i].errors != nullptr);
+      CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT);
+    }
+
+    if (strcmp("com.android.art.heap.iterate_through_heap_ext", extensions[i].id) == 0) {
+      CHECK(gIterateThroughHeapExt == nullptr);
+      gIterateThroughHeapExt = reinterpret_cast<IterateThroughHeapExt>(extensions[i].func);
+
+      CHECK_EQ(extensions[i].param_count, 4);
+
+      CHECK_EQ(strcmp("heap_filter", extensions[i].params[0].name), 0);
+      CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT);
+      CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN);
+
+      CHECK_EQ(strcmp("klass", extensions[i].params[1].name), 0);
+      CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JCLASS);
+      CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_IN);
+      CHECK_EQ(extensions[i].params[1].null_ok, true);
+
+      CHECK_EQ(strcmp("callbacks", extensions[i].params[2].name), 0);
+      CHECK_EQ(extensions[i].params[2].base_type, JVMTI_TYPE_CVOID);
+      CHECK_EQ(extensions[i].params[2].kind, JVMTI_KIND_IN_PTR);
+      CHECK_EQ(extensions[i].params[2].null_ok, false);
+
+      CHECK_EQ(strcmp("user_data", extensions[i].params[3].name), 0);
+      CHECK_EQ(extensions[i].params[3].base_type, JVMTI_TYPE_CVOID);
+      CHECK_EQ(extensions[i].params[3].kind, JVMTI_KIND_IN_PTR);
+      CHECK_EQ(extensions[i].params[3].null_ok, true);
+
+      CHECK_EQ(extensions[i].error_count, 3);
+      CHECK(extensions[i].errors != nullptr);
+      CHECK(extensions[i].errors[0] == JVMTI_ERROR_MUST_POSSESS_CAPABILITY);
+      CHECK(extensions[i].errors[1] == JVMTI_ERROR_INVALID_CLASS);
+      CHECK(extensions[i].errors[2] == JVMTI_ERROR_NULL_POINTER);
+    }
+  }
+
+  CHECK(gGetObjectHeapIdFn != nullptr);
+  CHECK(gGetHeapNameFn != nullptr);
+
+  FreeExtensionFunctionInfo(extensions, extension_count);
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getObjectHeapId(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  CHECK(gGetObjectHeapIdFn != nullptr);
+  jint heap_id;
+  jvmtiError result = gGetObjectHeapIdFn(jvmti_env, tag, &heap_id);
+  JvmtiErrorToException(env, jvmti_env, result);
+  return heap_id;
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_art_Test913_getHeapName(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jint heap_id) {
+  CHECK(gGetHeapNameFn != nullptr);
+  char* heap_name;
+  jvmtiError result = gGetHeapNameFn(jvmti_env, heap_id, &heap_name);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
+    return nullptr;
+  }
+  jstring ret = env->NewStringUTF(heap_name);
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(heap_name));
+  return ret;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallback(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag, jint heap_id) {
+  CHECK(gGetObjectHeapIdFn != nullptr);
+
+  {
+    struct GetObjectHeapIdCallbacks {
+      static jint JNICALL FollowReferencesCallback(
+          jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+          const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+          jlong class_tag ATTRIBUTE_UNUSED,
+          jlong referrer_class_tag ATTRIBUTE_UNUSED,
+          jlong size ATTRIBUTE_UNUSED,
+          jlong* tag_ptr,
+          jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+          jint length ATTRIBUTE_UNUSED,
+          void* user_data) {
+        if (*tag_ptr != 0) {
+          GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data);
+          if (*tag_ptr == p->check_callback_tag) {
+            jint tag_heap_id;
+            jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id);
+            CHECK_EQ(result, JVMTI_ERROR_NONE);
+            CHECK_EQ(tag_heap_id, p->check_callback_id);
+            return JVMTI_VISIT_ABORT;
+          }
+        }
+
+        return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+      }
+
+      jlong check_callback_tag;
+      jint check_callback_id;
+    };
+
+    jvmtiHeapCallbacks callbacks;
+    memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+    callbacks.heap_reference_callback = GetObjectHeapIdCallbacks::FollowReferencesCallback;
+
+    GetObjectHeapIdCallbacks ffc;
+    ffc.check_callback_tag = tag;
+    ffc.check_callback_id = heap_id;
+
+    jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, nullptr, &callbacks, &ffc);
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
+      return;
+    }
+  }
+
+  {
+    struct GetObjectHeapIdCallbacks {
+      static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                                jlong size ATTRIBUTE_UNUSED,
+                                                jlong* tag_ptr,
+                                                jint length ATTRIBUTE_UNUSED,
+                                                void* user_data) {
+        if (*tag_ptr != 0) {
+          GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data);
+          if (*tag_ptr == p->check_callback_tag) {
+            jint tag_heap_id;
+            jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id);
+            CHECK_EQ(result, JVMTI_ERROR_NONE);
+            CHECK_EQ(tag_heap_id, p->check_callback_id);
+            return JVMTI_VISIT_ABORT;
+          }
+        }
+
+        return 0;  // Continue visiting.
+      }
+
+      jlong check_callback_tag;
+      jint check_callback_id;
+    };
+
+    jvmtiHeapCallbacks callbacks;
+    memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+    callbacks.heap_iteration_callback = GetObjectHeapIdCallbacks::HeapIterationCallback;
+
+    GetObjectHeapIdCallbacks ffc;
+    ffc.check_callback_tag = tag;
+    ffc.check_callback_id = heap_id;
+
+    jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc);
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
+      return;
+    }
+  }
+}
+
+static bool gFoundExt = false;
+
+static jint JNICALL HeapIterationExtCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                             jlong size ATTRIBUTE_UNUSED,
+                                             jlong* tag_ptr,
+                                             jint length ATTRIBUTE_UNUSED,
+                                             void* user_data ATTRIBUTE_UNUSED,
+                                             jint heap_id) {
+  // We expect some tagged objects at or above the threshold, where the expected heap id is
+  // encoded into lowest byte.
+  constexpr jlong kThreshold = 30000000;
+  jlong tag = *tag_ptr;
+  if (tag >= kThreshold) {
+    jint expected_heap_id = static_cast<jint>(tag - kThreshold);
+    CHECK_EQ(expected_heap_id, heap_id);
+    gFoundExt = true;
+  }
+  return 0;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test913_iterateThroughHeapExt(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  CHECK(gIterateThroughHeapExt != nullptr);
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback =
+      reinterpret_cast<decltype(callbacks.heap_iteration_callback)>(HeapIterationExtCallback);
+
+  jvmtiError ret = gIterateThroughHeapExt(jvmti_env, 0, nullptr, &callbacks, nullptr);
+  JvmtiErrorToException(env, jvmti_env, ret);
+  CHECK(gFoundExt);
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java
index 8800b1a..97f48ee 100644
--- a/test/913-heaps/src/art/Test913.java
+++ b/test/913-heaps/src/art/Test913.java
@@ -16,6 +16,9 @@
 
 package art;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -44,6 +47,8 @@
     };
     t.start();
     cdl1.await();
+
+    doExtensionTests();
   }
 
   public static void runFollowReferences() throws Exception {
@@ -215,6 +220,59 @@
     System.out.println(getTag(floatObject));
   }
 
+  static ArrayList<Object> extensionTestHolder;
+
+  private static void doExtensionTests() {
+    checkForExtensionApis();
+
+    extensionTestHolder = new ArrayList<>();
+    System.out.println();
+
+    try {
+      getHeapName(-1);
+      System.out.println("Expected failure for -1");
+    } catch (Exception e) {
+    }
+    System.out.println(getHeapName(0));
+    System.out.println(getHeapName(1));
+    System.out.println(getHeapName(2));
+    System.out.println(getHeapName(3));
+    try {
+      getHeapName(4);
+      System.out.println("Expected failure for -1");
+    } catch (Exception e) {
+    }
+
+    System.out.println();
+
+    setTag(Object.class, 100000);
+    int objectClassHeapId = getObjectHeapId(100000);
+    int objClassExpectedHeapId = hasImage() ? 1 : 3;
+    if (objectClassHeapId != objClassExpectedHeapId) {
+      throw new RuntimeException("Expected object class in heap " + objClassExpectedHeapId +
+          " but received " + objectClassHeapId);
+    }
+
+    A a = new A();
+    extensionTestHolder.add(a);
+    setTag(a, 100001);
+    System.out.println(getObjectHeapId(100001));
+
+    checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId);
+    checkGetObjectHeapIdInCallback(100001, 3);
+
+    long baseTag = 30000000;
+    setTag(Object.class, baseTag + objClassExpectedHeapId);
+    setTag(Class.class, baseTag + objClassExpectedHeapId);
+    Object o = new Object();
+    extensionTestHolder.add(o);
+    setTag(o, baseTag + 3);
+
+    iterateThroughHeapExt();
+
+    extensionTestHolder = null;
+  }
+
   private static void runGc() {
     clearStats();
     forceGarbageCollection();
@@ -233,6 +291,24 @@
     System.out.println((s > 0) + " " + (f > 0));
   }
 
+  private static boolean hasImage() {
+    try {
+      int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName());
+      BufferedReader reader = new BufferedReader(new FileReader("/proc/" + pid + "/maps"));
+      String line;
+      while ((line = reader.readLine()) != null) {
+        if (line.endsWith(".art")) {
+          reader.close();
+          return true;
+        }
+      }
+      reader.close();
+      return false;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   private static class TestConfig {
     private Class<?> klass = null;
     private int heapFilter = 0;
@@ -642,9 +718,16 @@
   private static native int getGcFinishes();
   private static native void forceGarbageCollection();
 
+  private static native void checkForExtensionApis();
+  private static native int getObjectHeapId(long tag);
+  private static native String getHeapName(int heapId);
+  private static native void checkGetObjectHeapIdInCallback(long tag, int heapId);
+
   public static native String[] followReferences(int heapFilter, Class<?> klassFilter,
       Object initialObject, int stopAfter, int followSet, Object jniRef);
   public static native String[] followReferencesString(Object initialObject);
   public static native String followReferencesPrimitiveArray(Object initialObject);
   public static native String followReferencesPrimitiveFields(Object initialObject);
+
+  private static native void iterateThroughHeapExt();
 }
diff --git a/test/etc/default-build b/test/etc/default-build
index 744c38b..0508b85 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -82,9 +82,9 @@
 JACK_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 
 declare -A SMALI_EXPERIMENTAL_ARGS
-SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
-SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api-level 26"
-SMALI_EXPERIMENTAL_ARGS["agents"]="--api-level 26"
+SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api 24"
+SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api 26"
+SMALI_EXPERIMENTAL_ARGS["agents"]="--api 26"
 
 declare -A JAVAC_EXPERIMENTAL_ARGS
 JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8"
@@ -275,7 +275,7 @@
 
 if [ "${HAS_SMALI}" = "true" -a ${NEED_DEX} = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
@@ -287,7 +287,7 @@
 
 if [ "${HAS_SMALI_MULTIDEX}" = "true" -a ${NEED_DEX} = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'`
+  ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index f750556..c6ef8ed 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -668,7 +668,7 @@
     adb root > /dev/null
     adb wait-for-device
     if [ "$QUIET" = "n" ]; then
-      adb shell rm -r $DEX_LOCATION
+      adb shell rm -rf $DEX_LOCATION
       adb shell mkdir -p $DEX_LOCATION
       adb push $TEST_NAME.jar $DEX_LOCATION
       adb push $TEST_NAME-ex.jar $DEX_LOCATION
@@ -736,6 +736,7 @@
 
     rm -f $cmdfile
 else
+    # Host run.
     export ANDROID_PRINTF_LOG=brief
 
     # By default, and for prebuild dex2oat, we are interested in errors being logged. In dev mode
@@ -792,7 +793,12 @@
 
     cd $ANDROID_BUILD_TOP
 
+    # Make sure we delete any existing compiler artifacts.
+    # This enables tests to call the RUN script multiple times in a row
+    # without worrying about interference.
+    rm -rf ${DEX_LOCATION}/oat
     rm -rf ${DEX_LOCATION}/dalvik-cache/
+
     mkdir -p ${mkdir_locations} || exit 1
     $profman_cmdline || { echo "Profman failed." >&2 ; exit 2; }
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 0a7089a..4b44df7 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -336,11 +336,6 @@
         "variant": "no-image & jit"
     },
     {
-        "tests": ["597-deopt-new-string"],
-        "bug": "http://b/36467228",
-        "variant": "no-image & jit"
-    },
-    {
         "tests": ["530-checker-lse",
                   "530-checker-lse2",
                   "030-bad-finalizer",
@@ -562,6 +557,8 @@
         "tests": [
             "097-duplicate-method",
             "138-duplicate-classes-check2",
+            "159-app-image-fields",
+            "649-vdex-duplicate-method",
             "804-class-extends-itself",
             "921-hello-failure"
         ],
@@ -578,6 +575,7 @@
             "087-gc-after-link",
             "626-const-class-linking",
             "629-vdex-speed",
+            "647-jni-get-field-id",
             "944-transform-classloaders"
         ],
         "description": [
@@ -677,5 +675,12 @@
         "variant": "debug",
         "description": "Test disabled in debug mode because of dex2oatd timeouts.",
         "bug": "b/33650497"
+    },
+    {
+        "tests": "640-checker-integer-valueof",
+        "description": [
+            "The java.lang.Integer.valueOf intrinsic is not supported in PIC mode."
+        ],
+        "variant": "optimizing & pictest | speed-profile & pictest"
     }
 ]
diff --git a/test/run-test b/test/run-test
index f60f766..933a7fe 100755
--- a/test/run-test
+++ b/test/run-test
@@ -46,7 +46,7 @@
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 export USE_JACK="true"
-export SMALI_ARGS="--experimental"
+export SMALI_ARGS=""
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index a809246..c99159f 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -497,7 +497,11 @@
       test_skipped = True
     else:
       test_skipped = False
-      proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE, universal_newlines=True)
+      if gdb:
+        proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, universal_newlines=True)
+      else:
+        proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout = subprocess.PIPE,
+                                universal_newlines=True)
       script_output = proc.communicate(timeout=timeout)[0]
       test_passed = not proc.wait()
 
@@ -824,7 +828,15 @@
     adb_command = 'adb shell cat /sys/devices/system/cpu/present'
     cpu_info_proc = subprocess.Popen(adb_command.split(), stdout=subprocess.PIPE)
     cpu_info = cpu_info_proc.stdout.read()
-    return int(cpu_info.split('-')[1])
+    if type(cpu_info) is bytes:
+      cpu_info = cpu_info.decode('utf-8')
+    cpu_info_regex = '\d*-(\d*)'
+    match = re.match(cpu_info_regex, cpu_info)
+    if match:
+      return int(match.group(1))
+    else:
+      raise ValueError('Unable to predict the concurrency for the target. '
+                       'Is device connected?')
   else:
     return multiprocessing.cpu_count()
 
diff --git a/tools/dexfuzz/src/dexfuzz/DexFuzz.java b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
index 18db4c1..3b28754 100644
--- a/tools/dexfuzz/src/dexfuzz/DexFuzz.java
+++ b/tools/dexfuzz/src/dexfuzz/DexFuzz.java
@@ -61,11 +61,14 @@
     multipleListener.addListener(statusListener);
 
     if (Options.repeat > 1 && Options.execute) {
-      // Add the live updating listener, but only if we're not printing out lots of logs.
-      if (!Log.likelyToLog()) {
+      // If executing repeatedly, take care of reporting progress to the user.
+      if (Options.quiet) {
+        // Nothing if quiet is requested.
+      } else if (!Log.likelyToLog()) {
+        // Add the live updating listener if we're not printing out lots of logs.
         multipleListener.addListener(new UpdatingConsoleListener());
       } else {
-        // If we are dumping out lots of logs, then use the ConsoleLogger instead.
+        // If we are dumping out lots of logs, then use the console logger instead.
         multipleListener.addListener(new ConsoleLoggerListener());
       }
       // Add the file logging listener.
diff --git a/tools/dexfuzz/src/dexfuzz/Options.java b/tools/dexfuzz/src/dexfuzz/Options.java
index af8a05c..d1d8172 100644
--- a/tools/dexfuzz/src/dexfuzz/Options.java
+++ b/tools/dexfuzz/src/dexfuzz/Options.java
@@ -80,6 +80,7 @@
   public static boolean dumpMutations;
   public static boolean loadMutations;
   public static boolean runBisectionSearch;
+  public static boolean quiet;
 
   /**
    * Print out usage information about dexfuzz, and then exit.
@@ -144,6 +145,7 @@
     Log.always("  --unique-db=<file>     : Use <file> store results about unique programs");
     Log.always("                           (Default: unique_progs.db)");
     Log.always("  --bisection-search     : Run bisection search for divergences");
+    Log.always("  --quiet                : Disables progress log");
     Log.always("");
     System.exit(0);
   }
@@ -203,6 +205,8 @@
       maxMethods = 1;
     } else if (flag.equals("bisection-search")) {
       runBisectionSearch = true;
+    } else if (flag.equals("quiet")) {
+      quiet = true;
     } else if (flag.equals("help")) {
       usage();
     } else {
diff --git a/tools/jfuzz/run_dex_fuzz_test.py b/tools/jfuzz/run_dex_fuzz_test.py
index 34a92f6..c1d2e4f 100755
--- a/tools/jfuzz/run_dex_fuzz_test.py
+++ b/tools/jfuzz/run_dex_fuzz_test.py
@@ -91,7 +91,7 @@
   def Run(self):
     """Feeds JFuzz programs into DexFuzz testing."""
     print()
-    print('**\n**** JFuzz Testing\n**')
+    print('**\n**** J/DexFuzz Testing\n**')
     print()
     print('#Tests    :', self._num_tests)
     print('Device    :', self._device)
@@ -111,9 +111,11 @@
     for i in range(1, self._num_inputs + 1):
       jack_args = ['-cp', GetJackClassPath(), '--output-dex', '.', 'Test.java']
       if RunCommand(['jfuzz'], out='Test.java', err=None) != RetCode.SUCCESS:
+        print('Unexpected error while running JFuzz')
         raise FatalError('Unexpected error while running JFuzz')
       if RunCommand(['jack'] + jack_args, out=None, err='jackerr.txt',
                     timeout=30) != RetCode.SUCCESS:
+        print('Unexpected error while running Jack')
         raise FatalError('Unexpected error while running Jack')
       shutil.move('Test.java', '../Test' + str(i) + '.java')
       shutil.move('classes.dex', 'classes' + str(i) + '.dex')
@@ -126,8 +128,11 @@
                     '--execute',
                     '--execute-class=Test',
                     '--repeat=' + str(self._num_tests),
-                    '--dump-output', '--dump-verify',
-                    '--interpreter', '--optimizing',
+                    '--quiet',
+                    '--dump-output',
+                    '--dump-verify',
+                    '--interpreter',
+                    '--optimizing',
                     '--bisection-search']
     if self._device is not None:
       dexfuzz_args += ['--device=' + self._device, '--allarm']
diff --git a/tools/jfuzz/run_jfuzz_test.py b/tools/jfuzz/run_jfuzz_test.py
index b5f856f..7e72aa1 100755
--- a/tools/jfuzz/run_jfuzz_test.py
+++ b/tools/jfuzz/run_jfuzz_test.py
@@ -470,12 +470,20 @@
         self._num_not_compiled += 1
       else:
         self._num_not_run += 1
-    elif self._true_divergence_only and RetCode.TIMEOUT in (retc1, retc2):
-      # When only true divergences are requested, any divergence in return
-      # code where one is a time out is treated as a regular time out.
-      self._num_timed_out += 1
     else:
       # Divergence in return code.
+      if self._true_divergence_only:
+        # When only true divergences are requested, any divergence in return
+        # code where one is a time out is treated as a regular time out.
+        if RetCode.TIMEOUT in (retc1, retc2):
+          self._num_timed_out += 1
+          return
+        # When only true divergences are requested, a runtime crash in just
+        # the RI is treated as if not run at all.
+        if retc1 == RetCode.ERROR and retc2 == RetCode.SUCCESS:
+          if self._runner1.GetBisectionSearchArgs() is None:
+            self._num_not_run += 1
+            return
       self.ReportDivergence(retc1, retc2, is_output_divergence=False)
 
   def GetCurrentDivergenceDir(self):
diff --git a/tools/jfuzz/run_jfuzz_test_nightly.py b/tools/jfuzz/run_jfuzz_test_nightly.py
index a9f8365..e6c216d 100755
--- a/tools/jfuzz/run_jfuzz_test_nightly.py
+++ b/tools/jfuzz/run_jfuzz_test_nightly.py
@@ -26,8 +26,9 @@
 from tempfile import mkdtemp
 from tempfile import TemporaryFile
 
-# run_jfuzz_test.py success string.
+# run_jfuzz_test.py success/failure strings.
 SUCCESS_STRING = 'success (no divergences)'
+FAILURE_STRING = 'FAILURE (divergences)'
 
 # Constant returned by string find() method when search fails.
 NOT_FOUND = -1
@@ -43,7 +44,10 @@
   (args, unknown_args) = parser.parse_known_args()
   # Run processes.
   cmd = cmd + unknown_args
-  print('\n**** Running ****\n\n', cmd, '\n')
+  print()
+  print('**\n**** Nightly JFuzz Testing\n**')
+  print()
+  print('**** Running ****\n\n', cmd, '\n')
   output_files = [TemporaryFile('wb+') for _ in range(args.num_proc)]
   processes = []
   for i, output_file in enumerate(output_files):
@@ -69,7 +73,7 @@
     if directory_match:
       output_dirs.append(directory_match.group(1))
     if output_str.find(SUCCESS_STRING) == NOT_FOUND:
-      print('Tester', i, output_str)
+      print('Tester', i, FAILURE_STRING)
     else:
       print('Tester', i, SUCCESS_STRING)
   # Gather divergences.