Merge "ARM: Use r4 for stack overflow check to reduce code size."
diff --git a/benchmark/type-check/info.txt b/benchmark/type-check/info.txt
new file mode 100644
index 0000000..d14fb96
--- /dev/null
+++ b/benchmark/type-check/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating check-cast and instance-of instructions in a loop.
diff --git a/benchmark/type-check/src/TypeCheckBenchmark.java b/benchmark/type-check/src/TypeCheckBenchmark.java
new file mode 100644
index 0000000..96904d9
--- /dev/null
+++ b/benchmark/type-check/src/TypeCheckBenchmark.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TypeCheckBenchmark {
+    public void timeCheckCastLevel1ToLevel1(int count) {
+        Object[] arr = arr1;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel2ToLevel1(int count) {
+        Object[] arr = arr2;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel3ToLevel1(int count) {
+        Object[] arr = arr3;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel9ToLevel1(int count) {
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel9ToLevel2(int count) {
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            Level2 l2 = (Level2) arr[i & 1023];
+        }
+    }
+
+    public void timeInstanceOfLevel1ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr1;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel2ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr2;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel3ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr3;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel9ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel9ToLevel2(int count) {
+        int sum = 0;
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level2) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public static Object[] createArray(int level) {
+        try {
+            Class<?>[] ls = {
+                    null,
+                    Level1.class,
+                    Level2.class,
+                    Level3.class,
+                    Level4.class,
+                    Level5.class,
+                    Level6.class,
+                    Level7.class,
+                    Level8.class,
+                    Level9.class,
+            };
+            Class<?> l = ls[level];
+            Object[] array = new Object[1024];
+            for (int i = 0; i < array.length; ++i) {
+                array[i] = l.newInstance();
+            }
+            return array;
+        } catch (Exception unexpected) {
+            throw new Error("Initialization failure!");
+        }
+    }
+    Object[] arr1 = createArray(1);
+    Object[] arr2 = createArray(2);
+    Object[] arr3 = createArray(3);
+    Object[] arr9 = createArray(9);
+    int result;
+}
+
+class Level1 { }
+class Level2 extends Level1 { }
+class Level3 extends Level2 { }
+class Level4 extends Level3 { }
+class Level5 extends Level4 { }
+class Level6 extends Level5 { }
+class Level7 extends Level6 { }
+class Level8 extends Level7 { }
+class Level9 extends Level8 { }
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 4f5df03..1f210e1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -451,13 +451,27 @@
 
   ART_TEST_HOST_GTEST_DEPENDENCIES += $$(gtest_deps)
 
+.PHONY: $$(gtest_rule)
+ifeq (,$(SANITIZE_HOST))
+$$(gtest_rule): $$(gtest_exe) $$(gtest_deps)
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && \
+		$$(call ART_TEST_PASSED,$$@)) || $$(call ART_TEST_FAILED,$$@)
+else
 # Note: envsetup currently exports ASAN_OPTIONS=detect_leaks=0 to suppress leak detection, as some
 #       build tools (e.g., ninja) intentionally leak. We want leak checks when we run our tests, so
 #       override ASAN_OPTIONS. b/37751350
-.PHONY: $$(gtest_rule)
+# Note 2: Under sanitization, also capture the output, and run it through the stack tool on failure
+# (with the x86-64 ABI, as this allows symbolization of both x86 and x86-64). We don't do this in
+# general as it loses all the color output, and we have our own symbolization step when not running
+# under ASAN.
 $$(gtest_rule): $$(gtest_exe) $$(gtest_deps)
-	$(hide) ($$(call ART_TEST_SKIP,$$@) && ASAN_OPTIONS=detect_leaks=1 $$< && \
-		$$(call ART_TEST_PASSED,$$@)) || $$(call ART_TEST_FAILED,$$@)
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && set -o pipefail && \
+		ASAN_OPTIONS=detect_leaks=1 $$< 2>&1 | tee $$<.tmp.out >&2 && \
+		{ $$(call ART_TEST_PASSED,$$@) ; rm $$<.tmp.out ; }) || \
+		( grep -q AddressSanitizer $$<.tmp.out && \
+			{ echo "ABI: 'x86_64'" | cat - $$<.tmp.out | development/scripts/stack | tail -n 3000 ; } ; \
+		rm $$<.tmp.out ; $$(call ART_TEST_FAILED,$$@))
+endif
 
   ART_TEST_HOST_GTEST$$($(3)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(gtest_rule)
   ART_TEST_HOST_GTEST_RULES += $$(gtest_rule)
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 01f761b..2e60e7d 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -89,6 +89,7 @@
         "optimizing/ssa_liveness_analysis.cc",
         "optimizing/ssa_phi_elimination.cc",
         "optimizing/stack_map_stream.cc",
+        "optimizing/superblock_cloner.cc",
         "trampolines/trampoline_compiler.cc",
         "utils/assembler.cc",
         "utils/jni_macro_assembler.cc",
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 05fdc97..8af29d4 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,7 +23,6 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
-#include "jit/profile_compilation_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -34,6 +33,7 @@
 class CompilerDriver;
 class CompilerOptions;
 class CumulativeLogger;
+class ProfileCompilationInfo;
 class VerificationResults;
 
 template<class T> class Handle;
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index e413718..0f69dba 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -159,4 +159,10 @@
   storage->ReleaseMethodInfo(method_info_);
 }
 
+void CompiledMethod::ReleaseVMapTable() {
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  storage->ReleaseVMapTable(vmap_table_);
+  vmap_table_ = nullptr;
+}
+
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index acdce26..4e8f3ef 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -168,6 +168,10 @@
 
   ArrayRef<const linker::LinkerPatch> GetPatches() const;
 
+  // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap
+  // table to avoid writing the quicken info to the vdex file.
+  void ReleaseVMapTable();
+
  private:
   static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
   static constexpr size_t kIsIntrinsicSize = 1u;
@@ -186,7 +190,7 @@
   // For quick code, method specific information that is not very dedupe friendly (method indices).
   const LengthPrefixedArray<uint8_t>* const method_info_;
   // For quick code, holds code infos which contain stack maps, inline information, and etc.
-  const LengthPrefixedArray<uint8_t>* const vmap_table_;
+  const LengthPrefixedArray<uint8_t>* vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index bb2a214..df5bb37 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -137,10 +137,17 @@
     InstructionSet isa,
     const InstructionSetFeatures* features,
     bool mini_debug_info,
-    const MethodDebugInfo& mi) {
-  CHECK_EQ(mi.is_code_address_text_relative, false);
+    ArrayRef<const MethodDebugInfo> method_infos) {
+  CHECK_GT(method_infos.size(), 0u);
+  uint64_t min_address = std::numeric_limits<uint64_t>::max();
+  uint64_t max_address = 0;
+  for (const MethodDebugInfo& mi : method_infos) {
+    CHECK_EQ(mi.is_code_address_text_relative, false);
+    min_address = std::min(min_address, mi.code_address);
+    max_address = std::max(max_address, mi.code_address + mi.code_size);
+  }
   DebugInfo debug_info{};
-  debug_info.compiled_methods = ArrayRef<const debug::MethodDebugInfo>(&mi, 1);
+  debug_info.compiled_methods = method_infos;
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   linker::VectorOutputStream out("Debug ELF file", &buffer);
@@ -151,14 +158,14 @@
   if (mini_debug_info) {
     std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa,
                                                  features,
-                                                 mi.code_address,
-                                                 mi.code_size,
+                                                 min_address,
+                                                 max_address - min_address,
                                                  /* dex_section_address */ 0,
                                                  /* dex_section_size */ 0,
                                                  debug_info);
     builder->WriteSection(".gnu_debugdata", &mdi);
   } else {
-    builder->GetText()->AllocateVirtualMemory(mi.code_address, mi.code_size);
+    builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address);
     WriteDebugInfo(builder.get(),
                    debug_info,
                    dwarf::DW_DEBUG_FRAME_FORMAT,
@@ -173,11 +180,11 @@
     InstructionSet isa,
     const InstructionSetFeatures* features,
     bool mini_debug_info,
-    const MethodDebugInfo& method_info) {
+    ArrayRef<const MethodDebugInfo> method_infos) {
   if (Is64BitInstructionSet(isa)) {
-    return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_info);
+    return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_infos);
   } else {
-    return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_info);
+    return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_infos);
   }
 }
 
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 8ad0c42..e442e00 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -54,7 +54,7 @@
     InstructionSet isa,
     const InstructionSetFeatures* features,
     bool mini_debug_info,
-    const MethodDebugInfo& method_info);
+    ArrayRef<const MethodDebugInfo> method_infos);
 
 std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index a853714..9c9e8b3 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -72,8 +72,8 @@
       continue;  // Add symbol only for the first instance.
     }
     size_t name_offset;
-    if (!info.trampoline_name.empty()) {
-      name_offset = strtab->Write(info.trampoline_name);
+    if (!info.custom_name.empty()) {
+      name_offset = strtab->Write(info.custom_name);
     } else {
       DCHECK(info.dex_file != nullptr);
       std::string name = info.dex_file->PrettyMethod(info.dex_method_index, !mini_debug_info);
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index 43c8de2..d0b03ec 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -27,7 +27,7 @@
 namespace debug {
 
 struct MethodDebugInfo {
-  std::string trampoline_name;
+  std::string custom_name;
   const DexFile* dex_file;  // Native methods (trampolines) do not reference dex file.
   size_t class_def_index;
   uint32_t dex_method_index;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 308e75d..28c7fe2 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "dex/dex_file-inl.h"
 #include "dex/dex_instruction-inl.h"
+#include "dex_to_dex_decompiler.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
@@ -44,81 +45,106 @@
 // Control check-cast elision.
 const bool kEnableCheckCastEllision = true;
 
-struct QuickenedInfo {
-  QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver)
+    : driver_(driver),
+      lock_("Quicken lock", kDexToDexCompilerLock) {
+  DCHECK(driver != nullptr);
+}
 
-  uint32_t dex_pc;
-  uint16_t dex_member_index;
-};
+void DexToDexCompiler::ClearState() {
+  MutexLock lock(Thread::Current(), lock_);
+  active_dex_file_ = nullptr;
+  active_bit_vector_ = nullptr;
+  seen_code_items_.clear();
+  should_quicken_.clear();
+  shared_code_items_.clear();
+  blacklisted_code_items_.clear();
+  shared_code_item_quicken_info_.clear();
+}
 
-class DexCompiler {
- public:
-  DexCompiler(art::CompilerDriver& compiler,
-              const DexCompilationUnit& unit,
-              DexToDexCompilationLevel dex_to_dex_compilation_level)
-    : driver_(compiler),
+size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const {
+  MutexLock lock(self, lock_);
+  return seen_code_items_.size();
+}
+
+BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) {
+  if (active_dex_file_ != dex_file) {
+    active_dex_file_ = dex_file;
+    auto inserted = should_quicken_.emplace(dex_file,
+                                            BitVector(dex_file->NumMethodIds(),
+                                                      /*expandable*/ false,
+                                                      Allocator::GetMallocAllocator()));
+    active_bit_vector_ = &inserted.first->second;
+  }
+  return active_bit_vector_;
+}
+
+void DexToDexCompiler::MarkForCompilation(Thread* self,
+                                          const MethodReference& method_ref,
+                                          const DexFile::CodeItem* code_item) {
+  MutexLock lock(self, lock_);
+  BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file);
+  DCHECK(bitmap != nullptr);
+  DCHECK(!bitmap->IsBitSet(method_ref.index));
+  bitmap->SetBit(method_ref.index);
+  // Detect the shared code items.
+  if (!seen_code_items_.insert(code_item).second) {
+    shared_code_items_.insert(code_item);
+  }
+}
+
+DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler,
+                                                     const DexCompilationUnit& unit,
+                                                     const CompilationLevel compilation_level,
+                                                     const std::vector<uint8_t>* quicken_data)
+    : compiler_(compiler),
+      driver_(*compiler->GetDriver()),
       unit_(unit),
-      dex_to_dex_compilation_level_(dex_to_dex_compilation_level) {}
+      compilation_level_(compilation_level),
+      already_quickened_(quicken_data != nullptr),
+      existing_quicken_info_(already_quickened_
+          ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {}
 
-  ~DexCompiler() {}
-
-  void Compile();
-
-  const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
-    return quickened_info_;
+uint16_t DexToDexCompiler::CompilationState::NextIndex() {
+  DCHECK(already_quickened_);
+  if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) {
+    for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) {
+      LOG(ERROR) << pair->DumpString(nullptr);
+    }
+    LOG(FATAL) << "Mismatched number of quicken slots.";
   }
+  const uint16_t ret = existing_quicken_info_.GetData(quicken_index_);
+  quicken_index_++;
+  return ret;
+}
 
- private:
-  const DexFile& GetDexFile() const {
-    return *unit_.GetDexFile();
+uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst,
+                                                                    uint32_t index) {
+  if (UNLIKELY(already_quickened_)) {
+    return inst->IsQuickened() ? NextIndex() : index;
   }
+  DCHECK(!inst->IsQuickened());
+  return index;
+}
 
-  // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
-  // a barrier is required.
-  void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) {
+  // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only
+  // only call ShouldCompileMethod on one dex at a time.
+  MutexLock lock(Thread::Current(), lock_);
+  return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index);
+}
 
-  // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
-  // this case, returns the second NOP instruction pointer. Otherwise, returns
-  // the given "inst".
-  Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
-
-  // Compiles a field access into a quick field access.
-  // The field index is replaced by an offset within an Object where we can read
-  // from / write to this field. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the field index is encoded with 16 bits, we can replace it only if the
-  // field offset can be encoded with 16 bits too.
-  void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
-                                  Instruction::Code new_opcode, bool is_put);
-
-  // Compiles a virtual method invocation into a quick virtual method invocation.
-  // The method index is replaced by the vtable index where the corresponding
-  // Executable can be found. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the method index is encoded with 16 bits, we can replace it only if the
-  // vtable index can be encoded with 16 bits too.
-  void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                            Instruction::Code new_opcode, bool is_range);
-
-  CompilerDriver& driver_;
-  const DexCompilationUnit& unit_;
-  const DexToDexCompilationLevel dex_to_dex_compilation_level_;
-
-  // Filled by the compiler when quickening, in order to encode that information
-  // in the .oat file. The runtime will use that information to get to the original
-  // opcodes.
-  std::vector<QuickenedInfo> quickened_info_;
-
-  DISALLOW_COPY_AND_ASSIGN(DexCompiler);
-};
-
-void DexCompiler::Compile() {
-  DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
-  IterationRange<DexInstructionIterator> instructions(unit_.GetCodeItemAccessor().begin(),
-                                                      unit_.GetCodeItemAccessor().end());
+std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() {
+  DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize);
+  const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor();
   for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) {
     const uint32_t dex_pc = it.DexPc();
     Instruction* inst = const_cast<Instruction*>(&it.Inst());
+
+    if (!already_quickened_) {
+      DCHECK(!inst->IsQuickened());
+    }
+
     switch (inst->Opcode()) {
       case Instruction::RETURN_VOID:
         CompileReturnVoid(inst, dex_pc);
@@ -134,84 +160,147 @@
         break;
 
       case Instruction::IGET:
+      case Instruction::IGET_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false);
         break;
 
       case Instruction::IGET_WIDE:
+      case Instruction::IGET_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false);
         break;
 
       case Instruction::IGET_OBJECT:
+      case Instruction::IGET_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false);
         break;
 
       case Instruction::IGET_BOOLEAN:
+      case Instruction::IGET_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false);
         break;
 
       case Instruction::IGET_BYTE:
+      case Instruction::IGET_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false);
         break;
 
       case Instruction::IGET_CHAR:
+      case Instruction::IGET_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false);
         break;
 
       case Instruction::IGET_SHORT:
+      case Instruction::IGET_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false);
         break;
 
       case Instruction::IPUT:
+      case Instruction::IPUT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true);
         break;
 
       case Instruction::IPUT_BOOLEAN:
+      case Instruction::IPUT_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true);
         break;
 
       case Instruction::IPUT_BYTE:
+      case Instruction::IPUT_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true);
         break;
 
       case Instruction::IPUT_CHAR:
+      case Instruction::IPUT_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true);
         break;
 
       case Instruction::IPUT_SHORT:
+      case Instruction::IPUT_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true);
         break;
 
       case Instruction::IPUT_WIDE:
+      case Instruction::IPUT_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true);
         break;
 
       case Instruction::IPUT_OBJECT:
+      case Instruction::IPUT_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true);
         break;
 
       case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false);
         break;
 
       case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true);
         break;
 
       case Instruction::NOP:
-        // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
-        // index in the map for normal nops. This should be rare in real code.
-        quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        if (already_quickened_) {
+          const uint16_t reference_index = NextIndex();
+          quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index));
+          if (reference_index == DexFile::kDexNoIndex16) {
+            // This means it was a normal nop and not a check-cast.
+            break;
+          }
+          const uint16_t type_index = NextIndex();
+          if (driver_.IsSafeCast(&unit_, dex_pc)) {
+            quickened_info_.push_back(QuickenedInfo(dex_pc, type_index));
+          }
+          ++it;
+        } else {
+          // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
+          // index in the map for normal nops. This should be rare in real code.
+          quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        }
         break;
 
       default:
-        DCHECK(!inst->IsQuickened());
         // Nothing to do.
         break;
     }
   }
+
+  if (already_quickened_) {
+    DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices());
+  }
+
+  if (GetQuickenedInfo().empty()) {
+    // No need to create a CompiledMethod if there are no quickened opcodes.
+    return std::vector<uint8_t>();
+  }
+
+  std::vector<uint8_t> quicken_data;
+  if (kIsDebugBuild) {
+    // Double check that the counts line up with the size of the quicken info.
+    size_t quicken_count = 0;
+    for (const DexInstructionPcPair& pair : instructions) {
+      if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
+        ++quicken_count;
+      }
+    }
+    CHECK_EQ(quicken_count, GetQuickenedInfo().size());
+  }
+
+  QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size());
+  // Length is encoded by the constructor.
+  for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) {
+    // Dex pc is not serialized, only used for checking the instructions. Since we access the
+    // array based on the index of the quickened instruction, the indexes must line up perfectly.
+    // The reader side uses the NeedsIndexForInstruction function too.
+    const Instruction& inst = instructions.InstructionAt(info.dex_pc);
+    CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
+    builder.AddIndex(info.dex_member_index);
+  }
+  DCHECK(!quicken_data.empty());
+  return quicken_data;
 }
 
-void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
+void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
   DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID);
   if (unit_.IsConstructor()) {
     // Are we compiling a non clinit constructor which needs a barrier ?
@@ -229,7 +318,8 @@
   inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
 }
 
-Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
+Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst,
+                                                                  uint32_t dex_pc) {
   if (!kEnableCheckCastEllision) {
     return inst;
   }
@@ -246,27 +336,30 @@
                  << " by replacing it with 2 NOPs at dex pc "
                  << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
-  // Get to next instruction which is the second half of check-cast and replace
-  // it by a NOP.
-  inst = const_cast<Instruction*>(inst->Next());
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  if (!already_quickened_) {
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
+
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+    // Get to next instruction which is the second half of check-cast and replace
+    // it by a NOP.
+    inst = const_cast<Instruction*>(inst->Next());
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  }
   return inst;
 }
 
-void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
-                                             uint32_t dex_pc,
-                                             Instruction::Code new_opcode,
-                                             bool is_put) {
+void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst,
+                                                                    uint32_t dex_pc,
+                                                                    Instruction::Code new_opcode,
+                                                                    bool is_put) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t field_idx = inst->VRegC_22c();
+  uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c());
   MemberOffset field_offset(0u);
   bool is_volatile;
   bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
@@ -278,20 +371,29 @@
                    << " by field offset " << field_offset.Int32Value()
                    << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                    << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-    // We are modifying 4 consecutive bytes.
-    inst->SetOpcode(new_opcode);
-    // Replace field index by field offset.
-    inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    if (!already_quickened_) {
+      // We are modifying 4 consecutive bytes.
+      inst->SetOpcode(new_opcode);
+      // Replace field index by field offset.
+      inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    }
     quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx));
   }
 }
 
-void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                                       Instruction::Code new_opcode, bool is_range) {
+const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const {
+  return *unit_.GetDexFile();
+}
+
+void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst,
+                                                              uint32_t dex_pc,
+                                                              Instruction::Code new_opcode,
+                                                              bool is_range) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
+  uint32_t method_idx = GetIndexForInstruction(inst,
+                                               is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
   ScopedObjectAccess soa(Thread::Current());
 
   ClassLinker* class_linker = unit_.GetClassLinker();
@@ -318,19 +420,20 @@
                  << " by vtable index " << vtable_idx
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(new_opcode);
-  // Replace method index by vtable index.
-  if (is_range) {
-    inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
-  } else {
-    inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+  if (!already_quickened_) {
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(new_opcode);
+    // Replace method index by vtable index.
+    if (is_range) {
+      inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
+    } else {
+      inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+    }
   }
   quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx));
 }
 
-CompiledMethod* ArtCompileDEX(
-    CompilerDriver* driver,
+CompiledMethod* DexToDexCompiler::CompileMethod(
     const DexFile::CodeItem* code_item,
     uint32_t access_flags,
     InvokeType invoke_type ATTRIBUTE_UNUSED,
@@ -338,69 +441,122 @@
     uint32_t method_idx,
     Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file,
-    DexToDexCompilationLevel dex_to_dex_compilation_level) {
-  DCHECK(driver != nullptr);
-  if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    art::DexCompilationUnit unit(
-        class_loader,
-        class_linker,
-        dex_file,
-        code_item,
-        class_def_idx,
-        method_idx,
-        access_flags,
-        driver->GetVerifiedMethod(&dex_file, method_idx),
-        hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
-    art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level);
-    dex_compiler.Compile();
-    if (dex_compiler.GetQuickenedInfo().empty()) {
-      // No need to create a CompiledMethod if there are no quickened opcodes.
+    CompilationLevel compilation_level) {
+  if (compilation_level == CompilationLevel::kDontDexToDexCompile) {
+    return nullptr;
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  art::DexCompilationUnit unit(
+      class_loader,
+      class_linker,
+      dex_file,
+      code_item,
+      class_def_idx,
+      method_idx,
+      access_flags,
+      driver_->GetVerifiedMethod(&dex_file, method_idx),
+      hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
+
+  std::vector<uint8_t> quicken_data;
+  // If the code item is shared with multiple different method ids, make sure that we quicken only
+  // once and verify that all the dequicken maps match.
+  if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) {
+    // For shared code items, use a lock to prevent races.
+    MutexLock mu(soa.Self(), lock_);
+    // Blacklisted means there was a quickening conflict previously, bail early.
+    if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) {
       return nullptr;
     }
+    auto existing = shared_code_item_quicken_info_.find(code_item);
+    const bool already_quickened = existing != shared_code_item_quicken_info_.end();
+    {
+      CompilationState state(this,
+                             unit,
+                             compilation_level,
+                             already_quickened ? &existing->second.quicken_data_ : nullptr);
+      quicken_data = state.Compile();
+    }
 
-    // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    if (kIsDebugBuild) {
-      // Double check that the counts line up with the size of the quicken info.
-      size_t quicken_count = 0;
-      for (const DexInstructionPcPair& pair : unit.GetCodeItemAccessor()) {
-        if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
-          ++quicken_count;
+    // Already quickened, check that the data matches what was previously seen.
+    MethodReference method_ref(&dex_file, method_idx);
+    if (already_quickened) {
+      QuickenState* const existing_data = &existing->second;
+      if (existing_data->quicken_data_ != quicken_data) {
+        VLOG(compiler) << "Quicken data mismatch, dequickening method "
+                       << dex_file.PrettyMethod(method_idx);
+        // Unquicken using the existing quicken data.
+        optimizer::ArtDecompileDEX(dex_file,
+                                   *code_item,
+                                   ArrayRef<const uint8_t>(existing_data->quicken_data_),
+                                   /* decompile_return_instruction*/ false);
+        // Go clear the vmaps for all the methods that were already quickened to avoid writing them
+        // out during oat writing.
+        for (const MethodReference& ref : existing_data->methods_) {
+          CompiledMethod* method = driver_->GetCompiledMethod(ref);
+          DCHECK(method != nullptr);
+          method->ReleaseVMapTable();
         }
+        // Blacklist the method to never attempt to quicken it in the future.
+        blacklisted_code_items_.insert(code_item);
+        shared_code_item_quicken_info_.erase(existing);
+        return nullptr;
       }
-      CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size());
+      existing_data->methods_.push_back(method_ref);
+    } else {
+      QuickenState new_state;
+      new_state.methods_.push_back(method_ref);
+      new_state.quicken_data_ = quicken_data;
+      bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second;
+      CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx);
     }
-    std::vector<uint8_t> quicken_data;
-    QuickenInfoTable::Builder builder(&quicken_data, dex_compiler.GetQuickenedInfo().size());
-    // Length is encoded by the constructor.
-    for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
-      // Dex pc is not serialized, only used for checking the instructions. Since we access the
-      // array based on the index of the quickened instruction, the indexes must line up perfectly.
-      // The reader side uses the NeedsIndexForInstruction function too.
-      const Instruction& inst = unit.GetCodeItemAccessor().InstructionAt(info.dex_pc);
-      CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
-      builder.AddIndex(info.dex_member_index);
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    // Note that this needs to be behind the lock for this case since we may unquicken in another
+    // thread.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    InstructionSet instruction_set = driver->GetInstructionSet();
-    if (instruction_set == InstructionSet::kThumb2) {
-      // Don't use the thumb2 instruction set to avoid the one off code delta.
-      instruction_set = InstructionSet::kArm;
+  } else {
+    CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr);
+    quicken_data = state.Compile();
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    return CompiledMethod::SwapAllocCompiledMethod(
-        driver,
-        instruction_set,
-        ArrayRef<const uint8_t>(),                   // no code
-        0,
-        0,
-        0,
-        ArrayRef<const uint8_t>(),                   // method_info
-        ArrayRef<const uint8_t>(quicken_data),       // vmap_table
-        ArrayRef<const uint8_t>(),                   // cfi data
-        ArrayRef<const linker::LinkerPatch>());
   }
-  return nullptr;
+
+  if (quicken_data.empty()) {
+    return nullptr;
+  }
+
+  // Create a `CompiledMethod`, with the quickened information in the vmap table.
+  InstructionSet instruction_set = driver_->GetInstructionSet();
+  if (instruction_set == InstructionSet::kThumb2) {
+    // Don't use the thumb2 instruction set to avoid the one off code delta.
+    instruction_set = InstructionSet::kArm;
+  }
+  CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod(
+      driver_,
+      instruction_set,
+      ArrayRef<const uint8_t>(),                   // no code
+      0,
+      0,
+      0,
+      ArrayRef<const uint8_t>(),                   // method_info
+      ArrayRef<const uint8_t>(quicken_data),       // vmap_table
+      ArrayRef<const uint8_t>(),                   // cfi data
+      ArrayRef<const linker::LinkerPatch>());
+  return ret;
 }
 
 }  // namespace optimizer
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 80b94d2..abd0481 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -17,14 +17,22 @@
 #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "base/bit_vector.h"
 #include "dex/dex_file.h"
 #include "handle.h"
 #include "invoke_type.h"
+#include "method_reference.h"
+#include "quicken_info.h"
 
 namespace art {
 
 class CompiledMethod;
 class CompilerDriver;
+class DexCompilationUnit;
 
 namespace mirror {
 class ClassLoader;
@@ -32,21 +40,144 @@
 
 namespace optimizer {
 
-enum class DexToDexCompilationLevel {
-  kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
-  kOptimize               // Perform peep-hole optimizations.
-};
-std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
+class DexToDexCompiler {
+ public:
+  enum class CompilationLevel {
+    kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
+    kOptimize               // Perform peep-hole optimizations.
+  };
 
-CompiledMethod* ArtCompileDEX(CompilerDriver* driver,
-                              const DexFile::CodeItem* code_item,
-                              uint32_t access_flags,
-                              InvokeType invoke_type,
-                              uint16_t class_def_idx,
-                              uint32_t method_idx,
-                              Handle<mirror::ClassLoader> class_loader,
-                              const DexFile& dex_file,
-                              DexToDexCompilationLevel dex_to_dex_compilation_level);
+  explicit DexToDexCompiler(CompilerDriver* driver);
+
+  CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item,
+                                uint32_t access_flags,
+                                InvokeType invoke_type,
+                                uint16_t class_def_idx,
+                                uint32_t method_idx,
+                                Handle<mirror::ClassLoader> class_loader,
+                                const DexFile& dex_file,
+                                const CompilationLevel compilation_level) WARN_UNUSED;
+
+  void MarkForCompilation(Thread* self,
+                          const MethodReference& method_ref,
+                          const DexFile::CodeItem* code_item);
+
+  void ClearState();
+
+  CompilerDriver* GetDriver() {
+    return driver_;
+  }
+
+  bool ShouldCompileMethod(const MethodReference& ref);
+
+  size_t NumUniqueCodeItems(Thread* self) const;
+
+ private:
+  // Holds the state for compiling a single method.
+  struct CompilationState {
+    struct QuickenedInfo {
+      QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+
+      uint32_t dex_pc;
+      uint16_t dex_member_index;
+    };
+
+    CompilationState(DexToDexCompiler* compiler,
+                     const DexCompilationUnit& unit,
+                     const CompilationLevel compilation_level,
+                     const std::vector<uint8_t>* quicken_data);
+
+    const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
+      return quickened_info_;
+    }
+
+    // Returns the quickening info, or an empty array if it was not quickened.
+    // If already_quickened is true, then don't change anything but still return what the quicken
+    // data would have been.
+    std::vector<uint8_t> Compile();
+
+    const DexFile& GetDexFile() const;
+
+    // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
+    // a barrier is required.
+    void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
+    // this case, returns the second NOP instruction pointer. Otherwise, returns
+    // the given "inst".
+    Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a field access into a quick field access.
+    // The field index is replaced by an offset within an Object where we can read
+    // from / write to this field. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the field index is encoded with 16 bits, we can replace it only if the
+    // field offset can be encoded with 16 bits too.
+    void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
+                                    Instruction::Code new_opcode, bool is_put);
+
+    // Compiles a virtual method invocation into a quick virtual method invocation.
+    // The method index is replaced by the vtable index where the corresponding
+    // executable can be found. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the method index is encoded with 16 bits, we can replace it only if the
+    // vtable index can be encoded with 16 bits too.
+    void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
+                              Instruction::Code new_opcode, bool is_range);
+
+    // Return the next index.
+    uint16_t NextIndex();
+
+    // Returns the dequickened index if an instruction is quickened, otherwise return index.
+    uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index);
+
+    DexToDexCompiler* const compiler_;
+    CompilerDriver& driver_;
+    const DexCompilationUnit& unit_;
+    const CompilationLevel compilation_level_;
+
+    // Filled by the compiler when quickening, in order to encode that information
+    // in the .oat file. The runtime will use that information to get to the original
+    // opcodes.
+    std::vector<QuickenedInfo> quickened_info_;
+
+    // If the code item was already quickened previously.
+    const bool already_quickened_;
+    const QuickenInfoTable existing_quicken_info_;
+    uint32_t quicken_index_ = 0u;
+
+    DISALLOW_COPY_AND_ASSIGN(CompilationState);
+  };
+
+  struct QuickenState {
+    std::vector<MethodReference> methods_;
+    std::vector<uint8_t> quicken_data_;
+  };
+
+  BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_);
+
+  CompilerDriver* const driver_;
+
+  // State for adding methods (should this be in its own class?).
+  const DexFile* active_dex_file_ = nullptr;
+  BitVector* active_bit_vector_ = nullptr;
+
+  // Lock that guards duplicate code items and the bitmap.
+  mutable Mutex lock_;
+  // Record what method references are going to get quickened.
+  std::unordered_map<const DexFile*, BitVector> should_quicken_;
+  // Record what code items are already seen to detect when multiple methods have the same code
+  // item.
+  std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_);
+  // Guarded by lock_ during writing, accessed without a lock during quickening.
+  // This is safe because no thread is adding to the shared code items during the quickening phase.
+  std::unordered_set<const DexFile::CodeItem*> shared_code_items_;
+  std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_);
+  std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_
+      GUARDED_BY(lock_);
+};
+
+std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs);
 
 }  // namespace optimizer
 
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 540bd0c..baf97a8 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -17,6 +17,10 @@
 #include "quick_compiler_callbacks.h"
 
 #include "driver/compiler_driver.h"
+#include "mirror/class-inl.h"
+#include "mirror/object.h"
+#include "obj_ptr-inl.h"
+#include "thread-current-inl.h"
 #include "verification_results.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -54,4 +58,15 @@
   }
 }
 
+bool QuickCompilerCallbacks::CanUseOatStatusForVerification(mirror::Class* klass) {
+  // No dex files: conservatively false.
+  if (dex_files_ == nullptr) {
+    return false;
+  }
+
+  // If the class isn't from one of the dex files, accept oat file data.
+  const DexFile* dex_file = &klass->GetDexFile();
+  return std::find(dex_files_->begin(), dex_files_->end(), dex_file) == dex_files_->end();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 6d22f95..8a07e9c 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -23,12 +23,13 @@
 namespace art {
 
 class CompilerDriver;
+class DexFile;
 class VerificationResults;
 
 class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
  public:
   explicit QuickCompilerCallbacks(CompilerCallbacks::CallbackMode mode)
-      : CompilerCallbacks(mode) {}
+      : CompilerCallbacks(mode), dex_files_(nullptr) {}
 
   ~QuickCompilerCallbacks() { }
 
@@ -65,11 +66,19 @@
 
   void UpdateClassState(ClassReference ref, ClassStatus state) OVERRIDE;
 
+  bool CanUseOatStatusForVerification(mirror::Class* klass) OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void SetDexFiles(const std::vector<const DexFile*>* dex_files) {
+    dex_files_ = dex_files;
+  }
+
  private:
   VerificationResults* verification_results_ = nullptr;
   bool does_class_unloading_ = false;
   CompilerDriver* compiler_driver_ = nullptr;
   std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
+  const std::vector<const DexFile*>* dex_files_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 8698659..70cbb01 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -56,6 +56,7 @@
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "intrinsics_enum.h"
+#include "jit/profile_compilation_info.h"
 #include "jni_internal.h"
 #include "linker/linker_patch.h"
 #include "mirror/class-inl.h"
@@ -255,24 +256,6 @@
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
-class CompilerDriver::DexFileMethodSet {
- public:
-  explicit DexFileMethodSet(const DexFile& dex_file)
-    : dex_file_(dex_file),
-      method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) {
-  }
-  DexFileMethodSet(DexFileMethodSet&& other) = default;
-
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  BitVector& GetMethodIndexes() { return method_indexes_; }
-  const BitVector& GetMethodIndexes() const { return method_indexes_; }
-
- private:
-  const DexFile& dex_file_;
-  BitVector method_indexes_;
-};
-
 CompilerDriver::CompilerDriver(
     const CompilerOptions* compiler_options,
     VerificationResults* verification_results,
@@ -306,9 +289,8 @@
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
       max_arena_alloc_(0),
-      dex_to_dex_references_lock_("dex-to-dex references lock"),
-      dex_to_dex_references_(),
-      current_dex_to_dex_methods_(nullptr) {
+      compiling_dex_to_dex_(false),
+      dex_to_dex_compiler_(this) {
   DCHECK(compiler_options_ != nullptr);
 
   compiler_->Init();
@@ -398,7 +380,7 @@
   FreeThreadPools();
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -410,7 +392,7 @@
   if (klass == nullptr) {
     CHECK(self->IsExceptionPending());
     self->ClearException();
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic
   // references with actual offsets. We cannot re-verify such instructions.
@@ -418,22 +400,23 @@
   // We store the verification information in the class status in the oat file, which the linker
   // can validate (checksums) and use to skip load-time verification. It is thus safe to
   // optimize when a class has been fully verified before.
-  optimizer::DexToDexCompilationLevel max_level = optimizer::DexToDexCompilationLevel::kOptimize;
+  optimizer::DexToDexCompiler::CompilationLevel max_level =
+      optimizer::DexToDexCompiler::CompilationLevel::kOptimize;
   if (driver.GetCompilerOptions().GetDebuggable()) {
     // We are debuggable so definitions of classes might be changed. We don't want to do any
     // optimizations that could break that.
-    max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
   } else {
     // Class verification has failed: do not run DEX-to-DEX optimizations.
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self,
     const CompilerDriver& driver,
     jobject jclass_loader,
@@ -470,7 +453,7 @@
                           uint32_t method_idx,
                           Handle<mirror::ClassLoader> class_loader,
                           const DexFile& dex_file,
-                          optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
+                          optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
@@ -478,18 +461,18 @@
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if (driver->GetCurrentDexToDexMethods() != nullptr) {
+  if (driver->GetCompilingDexToDex()) {
+    optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler();
     // This is the second pass when we dex-to-dex compile previously marked methods.
     // TODO: Refactor the compilation to avoid having to distinguish the two passes
     // here. That should be done on a higher level. http://b/29089975
-    if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) {
+    if (compiler->ShouldCompileMethod(method_ref)) {
       VerificationResults* results = driver->GetVerificationResults();
       DCHECK(results != nullptr);
       const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
       // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
       // for example, relies on it.
-      compiled_method = optimizer::ArtCompileDEX(
-          driver,
+      compiled_method = compiler->CompileMethod(
           code_item,
           access_flags,
           invoke_type,
@@ -499,7 +482,7 @@
           dex_file,
           (verified_method != nullptr)
               ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
+              : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile);
     }
   } else if ((access_flags & kAccNative) != 0) {
     // Are we extracting only and have support for generic JNI down calls?
@@ -524,7 +507,7 @@
     bool compile = compilation_enabled &&
         // Basic checks, e.g., not <clinit>.
         results->IsCandidateForCompilation(method_ref, access_flags) &&
-        // Did not fail to create VerifiedMethod metadata.
+        // Did not fail to create VerifiedMethod metadcata.
         verified_method != nullptr &&
         // Do not have failures that should punt to the interpreter.
         !verified_method->HasRuntimeThrow() &&
@@ -546,10 +529,12 @@
                                                        dex_cache);
     }
     if (compiled_method == nullptr &&
-        dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
+        dex_to_dex_compilation_level !=
+            optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) {
       DCHECK(!Runtime::Current()->UseJitCompilation());
+      DCHECK(!driver->GetCompilingDexToDex());
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      driver->MarkForDexToDexCompilation(self, method_ref);
+      driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item);
     }
   }
   if (kTimeCompileMethod) {
@@ -616,14 +601,14 @@
   PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+  optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
       GetDexToDexCompilationLevel(self,
                                   *this,
                                   jclass_loader,
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
-  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  DCHECK(!compiling_dex_to_dex_);
   CompileMethod(self,
                 this,
                 code_item,
@@ -637,19 +622,10 @@
                 true,
                 dex_cache);
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
-  }
-  if (!dex_to_dex_references.empty()) {
-    DCHECK_EQ(dex_to_dex_references.size(), 1u);
-    DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file);
-    current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes();
-    DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx));
-    DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u);
+  const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self);
+  if (num_methods != 0) {
+    DCHECK_EQ(num_methods, 1u);
+    compiling_dex_to_dex_ = true;
     CompileMethod(self,
                   this,
                   code_item,
@@ -662,7 +638,8 @@
                   dex_to_dex_compilation_level,
                   true,
                   dex_cache);
-    current_dex_to_dex_methods_ = nullptr;
+    compiling_dex_to_dex_ = false;
+    dex_to_dex_compiler_.ClearState();
   }
 
   FreeThreadPools();
@@ -697,7 +674,8 @@
 // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
 //       stable order.
 
-static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
+static void ResolveConstStrings(ClassLinker* class_linker,
+                                Handle<mirror::DexCache> dex_cache,
                                 const DexFile& dex_file,
                                 const DexFile::CodeItem* code_item)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -706,7 +684,6 @@
     return;
   }
 
-  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
     switch (inst->Opcode()) {
       case Instruction::CONST_STRING:
@@ -754,22 +731,105 @@
           dex_file->StringByTypeIdx(class_def.class_idx_));
       if (!compilation_enabled) {
         // Compilation is skipped, do not resolve const-string in code of this class.
-        // TODO: Make sure that inlining honors this.
+        // FIXME: Make sure that inlining honors this. b/26687569
         continue;
       }
 
       // Direct and virtual methods.
-      int64_t previous_method_idx = -1;
       while (it.HasNextMethod()) {
-        uint32_t method_idx = it.GetMemberIndex();
-        if (method_idx == previous_method_idx) {
-          // smali can create dex files with two encoded_methods sharing the same method_idx
-          // http://code.google.com/p/smali/issues/detail?id=119
-          it.Next();
-          continue;
+        ResolveConstStrings(class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+// Initialize type check bit strings for check-cast and instance-of in the code. Done to have
+// deterministic allocation behavior. Right now this is single-threaded for simplicity.
+// TODO: Collect the relevant type indices in parallel, then process them sequentially in a
+//       stable order.
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          ClassLinker* class_linker,
+                                          Handle<mirror::DexCache> dex_cache,
+                                          const DexFile& dex_file,
+                                          const DexFile::CodeItem* code_item)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
+    switch (inst->Opcode()) {
+      case Instruction::CHECK_CAST:
+      case Instruction::INSTANCE_OF: {
+        dex::TypeIndex type_index(
+            (inst->Opcode() == Instruction::CHECK_CAST) ? inst->VRegB_21c() : inst->VRegC_22c());
+        const char* descriptor = dex_file.StringByTypeIdx(type_index);
+        // We currently do not use the bitstring type check for array or final (including
+        // primitive) classes. We may reconsider this in future if it's deemed to be beneficial.
+        // And we cannot use it for classes outside the boot image as we do not know the runtime
+        // value of their bitstring when compiling (it may not even get assigned at runtime).
+        if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) {
+          ObjPtr<mirror::Class> klass =
+              class_linker->LookupResolvedType(type_index,
+                                               dex_cache.Get(),
+                                               /* class_loader */ nullptr);
+          CHECK(klass != nullptr) << descriptor << " should have been previously resolved.";
+          // Now assign the bitstring if the class is not final. Keep this in sync with sharpening.
+          if (!klass->IsFinal()) {
+            MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+            SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+          }
         }
-        previous_method_idx = method_idx;
-        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
+        break;
+      }
+
+      default:
+        break;
+    }
+  }
+}
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          const std::vector<const DexFile*>& dex_files,
+                                          TimingLogger* timings) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+
+  for (const DexFile* dex_file : dex_files) {
+    dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file));
+    TimingLogger::ScopedTiming t("Initialize type check bitstrings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      it.SkipAllFields();
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not look for type checks in code of this class.
+        // FIXME: Make sure that inlining honors this. b/26687569
+        continue;
+      }
+
+      // Direct and virtual methods.
+      while (it.HasNextMethod()) {
+        InitializeTypeCheckBitstrings(
+            driver, class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
         it.Next();
       }
       DCHECK(!it.HasNext());
@@ -871,6 +931,13 @@
 
   UpdateImageClasses(timings);
   VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false);
+
+  if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) {
+    // Initialize type check bit string used by check-cast and instanceof.
+    // Do this now to have a deterministic image.
+    // Note: This is done after UpdateImageClasses() at it relies on the image classes to be final.
+    InitializeTypeCheckBitstrings(this, dex_files, timings);
+  }
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -1280,17 +1347,6 @@
   return IsImageClass(descriptor);
 }
 
-void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) {
-  MutexLock lock(self, dex_to_dex_references_lock_);
-  // Since we're compiling one dex file at a time, we need to look for the
-  // current dex file entry only at the end of dex_to_dex_references_.
-  if (dex_to_dex_references_.empty() ||
-      &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) {
-    dex_to_dex_references_.emplace_back(*method_ref.dex_file);
-  }
-  dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.index);
-}
-
 bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
                                                 ObjPtr<mirror::Class> resolved_class) {
   if (resolved_class == nullptr) {
@@ -2612,14 +2668,8 @@
             : profile_compilation_info_->DumpInfo(&dex_files));
   }
 
-  current_dex_to_dex_methods_ = nullptr;
-  Thread* const self = Thread::Current();
-  {
-    // Clear in case we aren't the first call to Compile.
-    MutexLock mu(self, dex_to_dex_references_lock_);
-    dex_to_dex_references_.clear();
-  }
-
+  dex_to_dex_compiler_.ClearState();
+  compiling_dex_to_dex_ = false;
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader,
@@ -2634,23 +2684,21 @@
     Runtime::Current()->ReclaimArenaPoolMemory();
   }
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(self, dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) {
+    compiling_dex_to_dex_ = true;
+    // TODO: Not visit all of the dex files, its probably rare that only one would have quickened
+    // methods though.
+    for (const DexFile* dex_file : dex_files) {
+      CompileDexFile(class_loader,
+                     *dex_file,
+                     dex_files,
+                     parallel_thread_pool_.get(),
+                     parallel_thread_count_,
+                     timings);
+    }
+    dex_to_dex_compiler_.ClearState();
+    compiling_dex_to_dex_ = false;
   }
-  for (const auto& method_set : dex_to_dex_references) {
-    current_dex_to_dex_methods_ = &method_set.GetMethodIndexes();
-    CompileDexFile(class_loader,
-                   method_set.GetDexFile(),
-                   dex_files,
-                   parallel_thread_pool_.get(),
-                   parallel_thread_count_,
-                   timings);
-  }
-  current_dex_to_dex_methods_ = nullptr;
 
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2701,7 +2749,7 @@
     CompilerDriver* const driver = manager_->GetCompiler();
 
     // Can we run DEX-to-DEX compiler on this class ?
-    optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
         GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def);
 
     ClassDataItemIterator it(dex_file, class_data);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ef16212..4b5916d 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -35,8 +35,8 @@
 #include "compiler.h"
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
+#include "dex/dex_to_dex_compiler.h"
 #include "driver/compiled_method_storage.h"
-#include "jit/profile_compilation_info.h"
 #include "method_reference.h"
 #include "os.h"
 #include "safe_map.h"
@@ -69,6 +69,7 @@
 class MemberOffset;
 template<class MirrorType> class ObjPtr;
 class ParallelCompilationManager;
+class ProfileCompilationInfo;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
 class TimingLogger;
@@ -76,6 +77,9 @@
 class VerificationResults;
 class VerifiedMethod;
 
+// Compile-time flag to enable/disable bitstring type checks.
+static constexpr bool kUseBitstringTypeCheck = true;
+
 enum EntryPointCallingConvention {
   // ABI of invocations to a method's interpreter entry point.
   kInterpreterAbi,
@@ -120,12 +124,11 @@
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!Locks::mutator_lock_);
 
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_to_dex_references_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   VerificationResults* GetVerificationResults() const;
 
@@ -362,13 +365,6 @@
     return true;
   }
 
-  void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref)
-      REQUIRES(!dex_to_dex_references_lock_);
-
-  const BitVector* GetCurrentDexToDexMethods() const {
-    return current_dex_to_dex_methods_;
-  }
-
   const ProfileCompilationInfo* GetProfileCompilationInfo() const {
     return profile_compilation_info_;
   }
@@ -381,6 +377,14 @@
         || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
   }
 
+  bool GetCompilingDexToDex() const {
+    return compiling_dex_to_dex_;
+  }
+
+  optimizer::DexToDexCompiler& GetDexToDexCompiler() {
+    return dex_to_dex_compiler_;
+  }
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
@@ -447,7 +451,7 @@
 
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
-               TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_);
+               TimingLogger* timings);
   void CompileDexFile(jobject class_loader,
                       const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
@@ -539,14 +543,9 @@
 
   size_t max_arena_alloc_;
 
-  // Data for delaying dex-to-dex compilation.
-  Mutex dex_to_dex_references_lock_;
-  // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation.
-  class DexFileMethodSet;
-  std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_);
-  // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method
-  // indexes for dex-to-dex compilation in the current dex file.
-  const BitVector* current_dex_to_dex_methods_;
+  // Compiler for dex to dex (quickening).
+  bool compiling_dex_to_dex_;
+  optimizer::DexToDexCompiler dex_to_dex_compiler_;
 
   friend class CompileClassVisitor;
   friend class DexToDexDecompilerTest;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 88e3e5b..2c62095 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -76,6 +76,7 @@
     const ArrayRef<mirror::Class*> types_array(types, count);
     std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
         kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array);
+    MutexLock mu(Thread::Current(), g_jit_debug_mutex);
     CreateJITCodeEntry(std::move(elf_file));
   }
 }
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index cedbe5d..6e0286a 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -250,12 +250,12 @@
     for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) {
       debug::MethodDebugInfo info = {};
       if (i == 0u) {
-        info.trampoline_name = base_name;
+        info.custom_name = base_name;
       } else {
         // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_`
         // keeps records also for thunks in previous oat files, names based on the thunk
         // index shall be unique across the whole multi-oat output.
-        info.trampoline_name = base_name + "_" + std::to_string(i);
+        info.custom_name = base_name + "_" + std::to_string(i);
       }
       info.isa = instruction_set_;
       info.is_code_address_text_relative = true;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 147df1e..d893cc8 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -836,9 +836,23 @@
       ValueRange array_range(&allocator_, lower, upper);
       // Try index range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceInstruction(bounds_check, index);
-        return;
+      if (index_range != nullptr) {
+        if (index_range->FitsIn(&array_range)) {
+          ReplaceInstruction(bounds_check, index);
+          return;
+        } else if (index_range->IsConstantValueRange()) {
+          // If the non-constant index turns out to have a constant range,
+          // make one more attempt to get a constant in the array range.
+          ValueRange* existing_range = LookupValueRange(array_length, block);
+          if (existing_range != nullptr &&
+              existing_range->IsConstantValueRange()) {
+            ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower());
+            if (index_range->FitsIn(&constant_array_range)) {
+              ReplaceInstruction(bounds_check, index);
+              return;
+            }
+          }
+        }
       }
       // Try index range obtained by induction variable analysis.
       // Disables dynamic bce if OOB is certain.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 3c5a37f..2dafbf7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -438,6 +438,8 @@
       case TypeCheckKind::kArrayCheck:
       case TypeCheckKind::kUnresolvedCheck:
         return false;
+      case TypeCheckKind::kBitstringCheck:
+        return true;
     }
     LOG(FATAL) << "Unreachable";
     UNREACHABLE();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13bbffa..b47a5cf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2112,6 +2112,26 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+    // Extract the bitstring bits.
+    __ Ubfx(temp, temp, 0, mask_bits);
+  }
+  // Compare the bitstring bits to `path_to_root`.
+  __ Cmp(temp, path_to_root);
+}
+
 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
@@ -3840,6 +3860,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -3848,7 +3870,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3861,7 +3889,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4047,6 +4077,23 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Cset(out, eq);
+      if (zero.IsLinked()) {
+        __ B(&done);
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -4069,7 +4116,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
@@ -4079,7 +4132,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
   DCHECK_GE(num_temps, 1u);
   DCHECK_LE(num_temps, 3u);
@@ -4260,6 +4315,20 @@
       __ B(ne, &start_loop);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f92c94f..cc369de 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -264,6 +264,8 @@
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
                                         vixl::aarch64::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch64::Register temp);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 6cbde72..504c647 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7195,6 +7195,67 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check,
+    vixl32::Register temp,
+    vixl32::FlagsUpdate flags_update) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+  // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    // Check if the bitstring bits are equal to `path_to_root`.
+    if (flags_update == SetFlags) {
+      __ Cmp(temp, path_to_root);
+    } else {
+      __ Sub(temp, temp, path_to_root);
+    }
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+      // Compare the bitstring bits using SUB.
+      __ Sub(temp, temp, path_to_root);
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+    } else if (IsUint<16>(path_to_root)) {
+      if (temp.IsLow()) {
+        // Note: Optimized for size but contains one more dependent instruction than necessary.
+        //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+        //       macro assembler would use the high reg IP for the constant by default.
+        // Compare the bitstring bits using SUB.
+        __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
+        __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
+        // Shift out bits that do not contribute to the comparison.
+        __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      } else {
+        // Extract the bitstring bits.
+        __ Ubfx(temp, temp, 0, mask_bits);
+        // Check if the bitstring bits are equal to `path_to_root`.
+        if (flags_update == SetFlags) {
+          __ Cmp(temp, path_to_root);
+        } else {
+          __ Sub(temp, temp, path_to_root);
+        }
+      }
+    } else {
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+      if (flags_update == SetFlags) {
+        __ Cmp(temp, path_to_root << (32u - mask_bits));
+      } else {
+        __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+      }
+    }
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
@@ -7386,6 +7447,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7394,7 +7457,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7409,7 +7478,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7649,6 +7720,26 @@
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+      // If `out` is a low reg and we would have another low reg temp, we could
+      // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+      //
+      // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+      // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+      // would be the same and we would have fewer direct data dependencies.
+      codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
+      break;
+    }
   }
 
   if (done.IsReferenced()) {
@@ -7666,7 +7757,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -7675,7 +7772,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
   vixl32::Register temp = RegisterFrom(temp_loc);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7860,6 +7959,20 @@
       __ B(ne, &start_loop, /* far_target */ false);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   if (done.IsReferenced()) {
     __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 38570bb..bd815f4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -322,6 +322,9 @@
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
                                         vixl32::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch32::Register temp,
+                                         vixl::aarch32::FlagsUpdate flags_update);
   void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5c8e46e..2ed0ab7 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1929,6 +1929,34 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                     Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // Only stype 0 is supported.
 }
@@ -3289,12 +3317,20 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -3303,7 +3339,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3353,7 +3389,7 @@
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bne(temp, cls, slow_path->GetEntryLabel());
+      __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -3379,7 +3415,7 @@
       // exception.
       __ Beqz(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bne(temp, cls, &loop);
+      __ Bne(temp, cls.AsRegister<Register>(), &loop);
       break;
     }
 
@@ -3394,7 +3430,7 @@
       // Walk over the class hierarchy to find a match.
       MipsLabel loop;
       __ Bind(&loop);
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -3417,7 +3453,7 @@
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3476,7 +3512,21 @@
       // Go to next interface.
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bne(AT, cls, &loop);
+      __ Bne(AT, cls.AsRegister<Register>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnez(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -7207,6 +7257,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7215,7 +7267,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7227,7 +7285,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7257,7 +7315,7 @@
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<Register>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -7282,7 +7340,7 @@
                                        kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqz(out, &done);
-      __ Bne(out, cls, &loop);
+      __ Bne(out, cls.AsRegister<Register>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -7298,7 +7356,7 @@
       // Walk over the class hierarchy to find a match.
       MipsLabel loop, success;
       __ Bind(&loop);
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -7323,7 +7381,7 @@
                                         kCompilerReadBarrierOption);
       // Do an exact check.
       MipsLabel success;
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -7355,7 +7413,7 @@
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bne(out, cls, slow_path->GetEntryLabel());
+      __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -7387,6 +7445,20 @@
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 32b3e42..ffeb3b0 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -237,6 +237,7 @@
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bcfe051..3ae8a30 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1775,6 +1775,34 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       GpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // only stype 0 is supported
 }
@@ -2844,12 +2872,20 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -2858,7 +2894,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2908,7 +2944,7 @@
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bnec(temp, cls, slow_path->GetEntryLabel());
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -2934,7 +2970,7 @@
       // exception.
       __ Beqzc(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bnec(temp, cls, &loop);
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
       break;
     }
 
@@ -2949,7 +2985,7 @@
       // Walk over the class hierarchy to find a match.
       Mips64Label loop;
       __ Bind(&loop);
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -2972,7 +3008,7 @@
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3031,7 +3067,21 @@
       __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bnec(AT, cls, &loop);
+      __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnezc(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -5524,6 +5574,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5532,7 +5584,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5544,7 +5602,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5574,7 +5632,7 @@
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<GpuRegister>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -5599,7 +5657,7 @@
                                        kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqzc(out, &done);
-      __ Bnec(out, cls, &loop);
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -5615,7 +5673,7 @@
       // Walk over the class hierarchy to find a match.
       Mips64Label loop, success;
       __ Bind(&loop);
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -5640,7 +5698,7 @@
                                         kCompilerReadBarrierOption);
       // Do an exact check.
       Mips64Label success;
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -5672,7 +5730,7 @@
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bnec(out, cls, slow_path->GetEntryLabel());
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -5704,6 +5762,20 @@
       __ Bc(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index d479410..87d5a9c 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -233,6 +233,7 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cbe9e0a..e85f900 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6234,6 +6234,27 @@
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                    Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if ((false) && mask_bits == 16u) {
+    // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
@@ -6426,6 +6447,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -6434,7 +6457,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for some cases.
@@ -6655,6 +6684,21 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ j(kNotEqual, &zero);
+      __ movl(out, Immediate(1));
+      __ jmp(&done);
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6681,6 +6725,10 @@
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
@@ -6900,6 +6948,19 @@
       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0082853..2d14d4c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -211,6 +211,7 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 510eec4..9f8b1bb 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5440,6 +5440,27 @@
   // No need for memory fence, thanks to the x86-64 memory model.
 }
 
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       CpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if ((false) && mask_bits == 16u) {
+    // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
   switch (desired_class_load_kind) {
@@ -5812,6 +5833,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5820,7 +5843,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
@@ -6049,6 +6078,27 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      if (zero.IsLinked()) {
+        __ j(kNotEqual, &zero);
+        __ movl(out, Immediate(1));
+        __ jmp(&done);
+      } else {
+        __ setcc(kEqual, out);
+        // setcc only sets the low byte.
+        __ andl(out, Immediate(1));
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6075,6 +6125,10 @@
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
@@ -6261,7 +6315,7 @@
       break;
     }
 
-    case TypeCheckKind::kInterfaceCheck:
+    case TypeCheckKind::kInterfaceCheck: {
       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
       // We can not get false positives by doing this.
       // /* HeapReference<Class> */ temp = obj->klass_
@@ -6297,6 +6351,20 @@
       // If `cls` was poisoned above, unpoison it.
       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
       break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   if (done.IsLinked()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e86123e..97f8ec7 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -208,6 +208,7 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8..fbcbe36 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
 #include "base/bit_vector-inl.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
 
 namespace art {
 
@@ -548,30 +553,85 @@
   }
 }
 
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
-  VisitInstruction(check);
-  HInstruction* input = check->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                                size_t input_pos,
+                                                bool check_value,
+                                                uint32_t expected_value,
+                                                const char* name) {
+  if (!check->InputAt(input_pos)->IsIntConstant()) {
+    AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
                           check->DebugName(),
                           check->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+                          input_pos,
+                          name,
+                          check->InputAt(2)->DebugName(),
+                          check->InputAt(2)->GetId()));
+  } else if (check_value) {
+    uint32_t actual_value =
+        static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+    if (actual_value != expected_value) {
+      AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+                            check->DebugName(),
+                            check->GetId(),
+                            name,
+                            actual_value,
+                            expected_value));
+    }
   }
 }
 
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
-  VisitInstruction(instruction);
-  HInstruction* input = instruction->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
-                          instruction->DebugName(),
-                          instruction->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+  VisitInstruction(check);
+  HInstruction* input = check->InputAt(1);
+  if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    if (!input->IsNullConstant()) {
+      AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
+    bool check_values = false;
+    BitString::StorageType expected_path_to_root = 0u;
+    BitString::StorageType expected_mask = 0u;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      ObjPtr<mirror::Class> klass = check->GetClass().Get();
+      MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+      SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+      if (state == SubtypeCheckInfo::kAssigned) {
+        expected_path_to_root =
+            SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+        expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+        check_values = true;
+      } else {
+        AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+                              check->DebugName(),
+                              check->GetId()));
+      }
+    }
+    CheckTypeCheckBitstringInput(
+        check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root");
+    CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask");
+  } else {
+    if (!input->IsLoadClass()) {
+      AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
   }
 }
 
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+  HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+  HandleTypeCheckInstruction(instruction);
+}
+
 void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d..dbedc40 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -71,6 +71,12 @@
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
 
+  void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                    size_t input_pos,
+                                    bool check_value,
+                                    uint32_t expected_value,
+                                    const char* name);
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
   void HandleLoop(HBasicBlock* loop_header);
   void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 12c6988..5519121 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -389,16 +389,23 @@
     StartAttributeStream("load_kind") << load_string->GetLoadKind();
   }
 
-  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
-    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+    StartAttributeStream("check_kind") << check->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
-        << check_cast->MustDoNullCheck() << std::noboolalpha;
+        << check->MustDoNullCheck() << std::noboolalpha;
+    if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+      StartAttributeStream("path_to_root") << std::hex
+          << "0x" << check->GetBitstringPathToRoot() << std::dec;
+      StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+    }
+  }
+
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+    HandleTypeCheckInstruction(check_cast);
   }
 
   void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
-    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
-    StartAttributeStream("must_do_null_check") << std::boolalpha
-        << instance_of->MustDoNullCheck() << std::noboolalpha;
+    HandleTypeCheckInstruction(instance_of);
   }
 
   void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
@@ -648,20 +655,32 @@
           << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
     }
 
+    // For the builder and the inliner, we want to add extra information on HInstructions
+    // that have reference types, and also HInstanceOf/HCheckcast.
     if ((IsPass(HGraphBuilder::kBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
-        && (instruction->GetType() == DataType::Type::kReference)) {
-      ReferenceTypeInfo info = instruction->IsLoadClass()
-        ? instruction->AsLoadClass()->GetLoadedClassRTI()
-        : instruction->GetReferenceTypeInfo();
+        && (instruction->GetType() == DataType::Type::kReference ||
+            instruction->IsInstanceOf() ||
+            instruction->IsCheckCast())) {
+      ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+          ? instruction->IsLoadClass()
+              ? instruction->AsLoadClass()->GetLoadedClassRTI()
+              : instruction->GetReferenceTypeInfo()
+          : instruction->IsInstanceOf()
+              ? instruction->AsInstanceOf()->GetTargetClassRTI()
+              : instruction->AsCheckCast()->GetTargetClassRTI();
       ScopedObjectAccess soa(Thread::Current());
       if (info.IsValid()) {
         StartAttributeStream("klass")
             << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
-        StartAttributeStream("can_be_null")
-            << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        if (instruction->GetType() == DataType::Type::kReference) {
+          StartAttributeStream("can_be_null")
+              << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        }
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
-      } else if (instruction->IsLoadClass()) {
+      } else if (instruction->IsLoadClass() ||
+                 instruction->IsInstanceOf() ||
+                 instruction->IsCheckCast()) {
         StartAttributeStream("klass") << "unresolved";
       } else {
         // The NullConstant may be added to the graph during other passes that happen between
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 64a1ecc..0205c6a 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1811,29 +1811,6 @@
   }
 }
 
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (cls == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
 void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
   HLoadString* load_string =
       new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
@@ -1848,22 +1825,8 @@
 HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
-  Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
-  bool needs_access_check = true;
-  if (klass != nullptr) {
-    if (klass->IsPublic()) {
-      needs_access_check = false;
-    } else {
-      ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
-      if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
-        needs_access_check = false;
-      }
-    }
-  }
-
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
   return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
 }
 
@@ -1908,25 +1871,83 @@
   return load_class;
 }
 
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+                                                        dex::TypeIndex type_index) {
+  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
+  ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass(
+      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_);
+  // TODO: Avoid creating excessive handles if the method references the same class repeatedly.
+  // (Use a map on the local_allocator_.)
+  return handles_->NewHandle(klass);
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+  if (klass == nullptr) {
+    return true;
+  } else if (klass->IsPublic()) {
+    return false;
+  } else {
+    ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+    return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+  }
+}
+
 void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
                                          uint8_t destination,
                                          uint8_t reference,
                                          dex::TypeIndex type_index,
                                          uint32_t dex_pc) {
   HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
-  HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
 
   ScopedObjectAccess soa(Thread::Current());
-  TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+  TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+      klass.Get(), code_generator_, compiler_driver_, needs_access_check);
+
+  HInstruction* class_or_null = nullptr;
+  HIntConstant* bitstring_path_to_root = nullptr;
+  HIntConstant* bitstring_mask = nullptr;
+  if (check_kind == TypeCheckKind::kBitstringCheck) {
+    // TODO: Allow using the bitstring check also if we need an access check.
+    DCHECK(!needs_access_check);
+    class_or_null = graph_->GetNullConstant(dex_pc);
+    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+    uint32_t path_to_root =
+        SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+    uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+    bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+    bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+  } else {
+    class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+  }
+  DCHECK(class_or_null != nullptr);
+
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (allocator_) HInstanceOf(object,
+                                                   class_or_null,
+                                                   check_kind,
+                                                   klass,
+                                                   dex_pc,
+                                                   allocator_,
+                                                   bitstring_path_to_root,
+                                                   bitstring_mask));
     UpdateLocal(destination, current_block_->GetLastInstruction());
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
     // We emit a CheckCast followed by a BoundType. CheckCast is a statement
     // which may throw. If it succeeds BoundType sets the new type of `object`
     // for all subsequent uses.
-    AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(
+        new (allocator_) HCheckCast(object,
+                                    class_or_null,
+                                    check_kind,
+                                    klass,
+                                    dex_pc,
+                                    allocator_,
+                                    bitstring_path_to_root,
+                                    bitstring_mask));
     AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
     UpdateLocal(reference, current_block_->GetLastInstruction());
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c53..f788292 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -39,6 +39,7 @@
 class HBasicBlockBuilder;
 class Instruction;
 class OptimizingCompilerStats;
+class ScopedObjectAccess;
 class SsaBuilder;
 class VariableSizedHandleScope;
 
@@ -232,6 +233,12 @@
                              bool needs_access_check)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Returns the outer-most compiling method's class.
   ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85d..2538fa3 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -576,7 +576,9 @@
 
 // Returns whether doing a type test between the class of `object` against `klass` has
 // a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+                                     HInstruction* object,
+                                     /*out*/bool* outcome) {
   DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
   ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
   ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +588,6 @@
     return false;
   }
 
-  ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
   if (!class_rti.IsValid()) {
     // Happens when the loaded class is unresolved.
     return false;
@@ -611,8 +612,8 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      check_cast->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -630,15 +631,18 @@
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
-      if (!load_class->HasUses()) {
-        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-        // However, here we know that it cannot because the checkcast was successfull, hence
-        // the class was already loaded.
-        load_class->GetBlock()->RemoveInstruction(load_class);
+      if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+        HLoadClass* load_class = check_cast->GetTargetClass();
+        if (!load_class->HasUses()) {
+          // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+          // However, here we know that it cannot because the checkcast was successfull, hence
+          // the class was already loaded.
+          load_class->GetBlock()->RemoveInstruction(load_class);
+        }
       }
     } else {
       // Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +653,8 @@
 
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
-  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      instruction->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -673,7 +677,7 @@
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
     MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
@@ -686,11 +690,14 @@
     }
     RecordSimplification();
     instruction->GetBlock()->RemoveInstruction(instruction);
-    if (outcome && !load_class->HasUses()) {
-      // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-      // However, here we know that it cannot because the instanceof check was successfull, hence
-      // the class was already loaded.
-      load_class->GetBlock()->RemoveInstruction(load_class);
+    if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+      HLoadClass* load_class = instruction->GetTargetClass();
+      if (!load_class->HasUses()) {
+        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+        // However, here we know that it cannot because the instanceof check was successfull, hence
+        // the class was already loaded.
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca1b451..2f8e33f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2011,6 +2011,14 @@
   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
 }
 
+void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
   CreateFPFPToFPCallLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 99b8b5d..830d040 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2811,6 +2811,14 @@
   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
+}
+
 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
   CreateFPFPToFPCallLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 113c9de..cafa522 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2835,6 +2835,15 @@
   GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
 }
 
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
 // static double java.lang.Math.cbrt(double a)
 void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) {
   CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 521bad2..89f1818 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2416,6 +2416,15 @@
   GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
 }
 
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
 // static double java.lang.Math.cbrt(double a)
 void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) {
   CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index baa410b..46b7f3f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1105,6 +1105,14 @@
   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
 }
 
+void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
   CreateFPFPToFPCallLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6dd8b8e..6483b7c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -897,6 +897,14 @@
   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
+  GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
   CreateFPFPToFPCallLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 727431a..5587f87 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -865,6 +865,15 @@
   graph->SetHasLoops(true);
 }
 
+void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) {
+  DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this);
+  blocks_.Union(&inner_loop->blocks_);
+  HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation();
+  if (outer_loop != nullptr) {
+    outer_loop->PopulateInnerLoopUpwards(this);
+  }
+}
+
 HBasicBlock* HLoopInformation::GetPreHeader() const {
   HBasicBlock* block = header_->GetPredecessors()[0];
   DCHECK(irreducible_ || (block == header_->GetDominator()));
@@ -3096,6 +3105,8 @@
       return os << "array_object_check";
     case TypeCheckKind::kArrayCheck:
       return os << "array_check";
+    case TypeCheckKind::kBitstringCheck:
+      return os << "bitstring_check";
     default:
       LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2047954..b0657d6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -826,6 +826,10 @@
   // Finds blocks that are part of this loop.
   void Populate();
 
+  // Updates blocks population of the loop and all of its outer' ones recursively after the
+  // population of the inner loop is updated.
+  void PopulateInnerLoopUpwards(HLoopInformation* inner_loop);
+
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
   bool Contains(const HBasicBlock& block) const;
@@ -856,6 +860,12 @@
 
   bool HasExitEdge() const;
 
+  // Resets back edge and blocks-in-loop data.
+  void ResetBasicBlockData() {
+    back_edges_.clear();
+    ClearAllBlocks();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -998,6 +1008,18 @@
     loop_information_->AddBackEdge(back_edge);
   }
 
+  // Registers a back edge; if the block was not a loop header before the call associates a newly
+  // created loop info with it.
+  //
+  // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop
+  // info for all blocks during back edges recalculation.
+  void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) {
+    if (loop_information_ == nullptr || loop_information_->GetHeader() != this) {
+      loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_);
+    }
+    loop_information_->AddBackEdge(back_edge);
+  }
+
   HGraph* GetGraph() const { return graph_; }
   void SetGraph(HGraph* graph) { graph_ = graph; }
 
@@ -5929,8 +5951,7 @@
         special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
-        klass_(klass),
-        loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+        klass_(klass) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
     DCHECK(!is_referrers_class || !needs_access_check);
@@ -5940,6 +5961,7 @@
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(false);
   }
 
   bool IsClonable() const OVERRIDE { return true; }
@@ -5988,13 +6010,18 @@
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
-    return loaded_class_rti_;
+    if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
   }
 
-  void SetLoadedClassRTI(ReferenceTypeInfo rti) {
-    // Make sure we only set exact types (the loaded class should never be merged).
-    DCHECK(rti.IsExact());
-    loaded_class_rti_ = rti;
+  // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(true);
   }
 
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
@@ -6047,7 +6074,8 @@
   static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
   static constexpr size_t kFieldLoadKindSize =
       MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
-  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
   static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
   using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
 
@@ -6075,8 +6103,6 @@
   const DexFile& dex_file_;
 
   Handle<mirror::Class> klass_;
-
-  ReferenceTypeInfo loaded_class_rti_;
 };
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
@@ -6604,49 +6630,143 @@
   kInterfaceCheck,        // No optimization yet when checking against an interface.
   kArrayObjectCheck,      // Can just check if the array is not primitive.
   kArrayCheck,            // No optimization yet when checking against a generic array.
+  kBitstringCheck,        // Compare the type check bitstring.
   kLast = kArrayCheck
 };
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
  public:
-  HInstanceOf(HInstruction* object,
-              HLoadClass* target_class,
-              TypeCheckKind check_kind,
-              uint32_t dex_pc)
-      : HExpression(DataType::Type::kBool,
-                    SideEffectsForArchRuntimeCalls(check_kind),
-                    dex_pc) {
+  HTypeCheckInstruction(HInstruction* object,
+                        HInstruction* target_class_or_null,
+                        TypeCheckKind check_kind,
+                        Handle<mirror::Class> klass,
+                        uint32_t dex_pc,
+                        ArenaAllocator* allocator,
+                        HIntConstant* bitstring_path_to_root,
+                        HIntConstant* bitstring_mask,
+                        SideEffects side_effects)
+      : HVariableInputSizeInstruction(
+          side_effects,
+          dex_pc,
+          allocator,
+          /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+          kArenaAllocTypeCheckInputs),
+        klass_(klass) {
     SetPackedField<TypeCheckKindField>(check_kind);
     SetPackedFlag<kFlagMustDoNullCheck>(true);
+    SetPackedFlag<kFlagValidTargetClassRTI>(false);
     SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
+    SetRawInputAt(1, target_class_or_null);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+    if (check_kind == TypeCheckKind::kBitstringCheck) {
+      DCHECK(target_class_or_null->IsNullConstant());
+      SetRawInputAt(2, bitstring_path_to_root);
+      SetRawInputAt(3, bitstring_mask);
+    } else {
+      DCHECK(target_class_or_null->IsLoadClass());
+    }
   }
 
   HLoadClass* GetTargetClass() const {
+    DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
     HInstruction* load_class = InputAt(1);
     DCHECK(load_class->IsLoadClass());
     return load_class->AsLoadClass();
   }
 
+  uint32_t GetBitstringPathToRoot() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* path_to_root = InputAt(2);
+    DCHECK(path_to_root->IsIntConstant());
+    return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+  }
+
+  uint32_t GetBitstringMask() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* mask = InputAt(3);
+    DCHECK(mask->IsIntConstant());
+    return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
+  }
+
   bool IsClonable() const OVERRIDE { return true; }
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+    return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
   }
 
-  bool NeedsEnvironment() const OVERRIDE {
-    return CanCallRuntime(GetTypeCheckKind());
-  }
-
-  // Used only in code generation.
   bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
   void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
   TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
   bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
 
+  ReferenceTypeInfo GetTargetClassRTI() {
+    if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
+  }
+
+  // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidTargetClassRTI>(true);
+  }
+
+  Handle<mirror::Class> GetClass() const {
+    return klass_;
+  }
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
+
+ private:
+  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeCheckKindSize =
+      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
+  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
+  static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
+  static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+  Handle<mirror::Class> klass_;
+};
+
+class HInstanceOf FINAL : public HTypeCheckInstruction {
+ public:
+  HInstanceOf(HInstruction* object,
+              HInstruction* target_class_or_null,
+              TypeCheckKind check_kind,
+              Handle<mirror::Class> klass,
+              uint32_t dex_pc,
+              ArenaAllocator* allocator,
+              HIntConstant* bitstring_path_to_root,
+              HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+  DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; }
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return CanCallRuntime(GetTypeCheckKind());
+  }
+
   static bool CanCallRuntime(TypeCheckKind check_kind) {
     // Mips currently does runtime calls for any other checks.
     return check_kind != TypeCheckKind::kExactCheck;
@@ -6660,15 +6780,6 @@
 
  protected:
   DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
-
- private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFieldTypeCheckKindSize =
-      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
-  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
-  static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 };
 
 class HBoundType FINAL : public HExpression<1> {
@@ -6718,31 +6829,25 @@
   ReferenceTypeInfo upper_bound_;
 };
 
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTypeCheckInstruction {
  public:
   HCheckCast(HInstruction* object,
-             HLoadClass* target_class,
+             HInstruction* target_class_or_null,
              TypeCheckKind check_kind,
-             uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
-    SetPackedField<TypeCheckKindField>(check_kind);
-    SetPackedFlag<kFlagMustDoNullCheck>(true);
-    SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
-  }
-
-  HLoadClass* GetTargetClass() const {
-    HInstruction* load_class = InputAt(1);
-    DCHECK(load_class->IsLoadClass());
-    return load_class->AsLoadClass();
-  }
-
-  bool IsClonable() const OVERRIDE { return true; }
-  bool CanBeMoved() const OVERRIDE { return true; }
-
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
-  }
+             Handle<mirror::Class> klass,
+             uint32_t dex_pc,
+             ArenaAllocator* allocator,
+             HIntConstant* bitstring_path_to_root,
+             HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffects::CanTriggerGC()) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     // Instruction may throw a CheckCastError.
@@ -6751,24 +6856,10 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
-  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
-  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
-  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
-
   DECLARE_INSTRUCTION(CheckCast);
 
  protected:
   DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeCheckKindSize =
-      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
-  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
-  static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 };
 
 /**
@@ -7309,19 +7400,19 @@
 class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
  public:
   explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
-      : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {}
+      : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
 
   void VisitInstruction(HInstruction* instruction) OVERRIDE {
     if (instruction->IsClonable()) {
       ReplaceInstrOrPhiByClone(instruction);
-      instr_replaced_by_clones_count++;
+      instr_replaced_by_clones_count_++;
     }
   }
 
-  size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; }
+  size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; }
 
  private:
-  size_t instr_replaced_by_clones_count;
+  size_t instr_replaced_by_clones_count_;
 
   DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
 };
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a3b1f0c..c35c490 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -382,6 +382,8 @@
                             PassObserver* pass_observer,
                             VariableSizedHandleScope* handles) const;
 
+  void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info);
+
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
   std::unique_ptr<std::ostream> visualizer_output_;
@@ -1230,7 +1232,7 @@
       const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
       const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
       debug::MethodDebugInfo info = {};
-      DCHECK(info.trampoline_name.empty());
+      DCHECK(info.custom_name.empty());
       info.dex_file = dex_file;
       info.class_def_index = class_def_idx;
       info.dex_method_index = method_idx;
@@ -1246,14 +1248,7 @@
       info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
       info.code_info = nullptr;
       info.cfi = jni_compiled_method.GetCfi();
-      // If both flags are passed, generate full debug info.
-      const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
-      std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
-          GetCompilerDriver()->GetInstructionSet(),
-          GetCompilerDriver()->GetInstructionSetFeatures(),
-          mini_debug_info,
-          info);
-      CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+      GenerateJitDebugInfo(info);
     }
 
     Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1361,7 +1356,7 @@
     const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
     const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
     debug::MethodDebugInfo info = {};
-    DCHECK(info.trampoline_name.empty());
+    DCHECK(info.custom_name.empty());
     info.dex_file = dex_file;
     info.class_def_index = class_def_idx;
     info.dex_method_index = method_idx;
@@ -1377,14 +1372,7 @@
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
-    // If both flags are passed, generate full debug info.
-    const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
-    std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
-        GetCompilerDriver()->GetInstructionSet(),
-        GetCompilerDriver()->GetInstructionSetFeatures(),
-        mini_debug_info,
-        info);
-    CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+    GenerateJitDebugInfo(info);
   }
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1408,4 +1396,22 @@
   return true;
 }
 
+void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) {
+  const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+  DCHECK(compiler_options.GenerateAnyDebugInfo());
+
+  // If both flags are passed, generate full debug info.
+  const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+
+  // Create entry for the single method that we just compiled.
+  std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
+      GetCompilerDriver()->GetInstructionSet(),
+      GetCompilerDriver()->GetInstructionSetFeatures(),
+      mini_debug_info,
+      ArrayRef<const debug::MethodDebugInfo>(&info, 1));
+  MutexLock mu(Thread::Current(), g_jit_debug_mutex);
+  JITCodeEntry* entry = CreateJITCodeEntry(elf_file);
+  IncrementJITCodeEntryRefcount(entry, info.code_address);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 0023265..a6a2f46 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -99,6 +99,7 @@
   kConstructorFenceRemovedLSE,
   kConstructorFenceRemovedPFRA,
   kConstructorFenceRemovedCFRE,
+  kBitstringTypeCheck,
   kJitOutOfMemoryForCommit,
   kLastStat
 };
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c00..5973339 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -34,6 +34,20 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
 void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
   check->ReplaceWith(check->InputAt(0));
 }
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f01..f6e4d3e 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,8 @@
       "prepare_for_register_allocation";
 
  private:
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;
   void VisitNullCheck(HNullCheck* check) OVERRIDE;
   void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 8bb124e..178d7fd 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -87,6 +87,7 @@
   void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
   void VisitLoadString(HLoadString* instr) OVERRIDE;
   void VisitLoadException(HLoadException* instr) OVERRIDE;
@@ -171,6 +172,12 @@
                 << "NullCheck " << instr->GetReferenceTypeInfo()
                 << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
           }
+        } else if (instr->IsInstanceOf()) {
+          HInstanceOf* iof = instr->AsInstanceOf();
+          DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+        } else if (instr->IsCheckCast()) {
+          HCheckCast* check = instr->AsCheckCast();
+          DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
         }
       }
     }
@@ -499,8 +506,7 @@
     return;
   }
 
-  HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
   if (!class_rti.IsValid()) {
     // He have loaded an unresolved class. Don't bother bounding the type.
     return;
@@ -644,15 +650,20 @@
 
 void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  Handle<mirror::Class> resolved_class = instr->GetClass();
-  if (IsAdmissible(resolved_class.Get())) {
-    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
-        resolved_class, /* is_exact */ true));
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidLoadedClassRTI();
   }
   instr->SetReferenceTypeInfo(
       ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
 }
 
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidTargetClassRTI();
+  }
+}
+
 void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
   instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
 }
@@ -720,8 +731,6 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
   if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
     // The next instruction is not an uninitialized BoundType. This must be
@@ -730,12 +739,14 @@
   }
   DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
 
-  if (class_rti.IsValid()) {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::Class> klass = check_cast->GetClass();
+  if (IsAdmissible(klass.Get())) {
     DCHECK(is_first_run_);
-    ScopedObjectAccess soa(Thread::Current());
+    check_cast->SetValidTargetClassRTI();
     // This is the first run of RTP and class is resolved.
-    bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
-    bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+    bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+    bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
                               /* CheckCast succeeds for nulls. */ true);
   } else {
     // This is the first run of RTP and class is unresolved. Remove the binding.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411..dffef17 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -236,6 +236,75 @@
   return load_kind;
 }
 
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(!klass->IsProxyClass());
+  DCHECK(!klass->IsArrayClass());
+
+  if (Runtime::Current()->UseJitCompilation()) {
+    // If we're JITting, try to assign a type check bitstring (fall through).
+  } else if (codegen->GetCompilerOptions().IsBootImage()) {
+    const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+    if (!compiler_driver->IsImageClass(descriptor)) {
+      return false;
+    }
+    // If the target is a boot image class, try to assign a type check bitstring (fall through).
+    // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+  } else {
+    // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+    // already assigned in the boot image.
+    return false;
+  }
+
+  // Try to assign a type check bitstring.
+  MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+  if ((false) &&  // FIXME: Inliner does not respect compiler_driver->IsClassToCompile()
+                  // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+      kIsDebugBuild &&
+      codegen->GetCompilerOptions().IsBootImage() &&
+      codegen->GetCompilerOptions().IsForceDeterminism()) {
+    SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+    CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+        << klass->PrettyDescriptor() << "/" << old_state
+        << " in " << codegen->GetGraph()->PrettyMethod();
+  }
+  SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+  return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                                CodeGenerator* codegen,
+                                                CompilerDriver* compiler_driver,
+                                                bool needs_access_check) {
+  if (klass == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (klass->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (klass->IsArrayClass()) {
+    if (klass->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (klass->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (klass->IsFinal()) {  // TODO: Consider using bitstring for final classes.
+    return TypeCheckKind::kExactCheck;
+  } else if (kUseBitstringTypeCheck &&
+             !needs_access_check &&
+             CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) {
+    // TODO: We should not need the `!needs_access_check` check but getting rid of that
+    // requires rewriting some optimizations in instruction simplifier.
+    return TypeCheckKind::kBitstringCheck;
+  } else if (klass->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
 void HSharpening::ProcessLoadString(
     HLoadString* load_string,
     CodeGenerator* codegen,
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d..fa3e948 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -44,12 +44,10 @@
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
-  // Used by the builder.
-  static void ProcessLoadString(HLoadString* load_string,
-                                CodeGenerator* codegen,
-                                CompilerDriver* compiler_driver,
-                                const DexCompilationUnit& dex_compilation_unit,
-                                VariableSizedHandleScope* handles);
+  // Used by Sharpening and InstructionSimplifier.
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* compiler_driver);
 
   // Used by the builder and the inliner.
   static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
@@ -58,10 +56,19 @@
                                                    const DexCompilationUnit& dex_compilation_unit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Used by Sharpening and InstructionSimplifier.
-  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
-                                          CodeGenerator* codegen,
-                                          CompilerDriver* compiler_driver);
+  // Used by the builder.
+  static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver,
+                                            bool needs_access_check)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Used by the builder.
+  static void ProcessLoadString(HLoadString* load_string,
+                                CodeGenerator* codegen,
+                                CompilerDriver* compiler_driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                VariableSizedHandleScope* handles);
 
  private:
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
new file mode 100644
index 0000000..a7c23be
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "superblock_cloner.h"
+
+#include "common_dominator.h"
+#include "graph_checker.h"
+
+#include <iostream>
+
+namespace art {
+
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
+
+void HEdge::Dump(std::ostream& stream) const {
+  stream << "(" << from_ << "->" << to_ << ")";
+}
+
+//
+// Static helper methods.
+//
+
+// Returns whether instruction has any uses (regular or environmental) outside the region,
+// defined by basic block set.
+static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) {
+  auto& uses = instr->GetUses();
+  for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) {
+    HInstruction* user = use_node->GetUser();
+    if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+      return true;
+    }
+  }
+
+  auto& env_uses = instr->GetEnvUses();
+  for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) {
+    HInstruction* user = use_node->GetUser()->GetHolder();
+    if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Returns whether the phi's inputs are the same HInstruction.
+static bool ArePhiInputsTheSame(const HPhi* phi) {
+  HInstruction* first_input = phi->InputAt(0);
+  for (size_t i = 1, e = phi->InputCount(); i < e; i++) {
+    if (phi->InputAt(i) != first_input) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+  if (loop1 != nullptr || loop2 != nullptr) {
+    return nullptr;
+  }
+
+  if (loop1->IsIn(*loop2)) {
+    return loop2;
+  } else if (loop2->IsIn(*loop1)) {
+    return loop1;
+  }
+  HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
+  return block->GetLoopInformation();
+}
+
+// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
+static void OrderLoopsHeadersPredecessors(HGraph* graph) {
+  for (HBasicBlock* block : graph->GetPostOrder()) {
+    if (block->IsLoopHeader()) {
+      graph->OrderLoopHeaderPredecessors(block);
+    }
+  }
+}
+
+//
+// Helpers for CloneBasicBlock.
+//
+
+void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) {
+  DCHECK(!copy_instr->IsPhi());
+  for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) {
+    // Copy instruction holds the same input as the original instruction holds.
+    HInstruction* orig_input = copy_instr->InputAt(i);
+    if (!IsInOrigBBSet(orig_input->GetBlock())) {
+      // Defined outside the subgraph.
+      continue;
+    }
+    HInstruction* copy_input = GetInstrCopy(orig_input);
+    // copy_instr will be registered as a user of copy_inputs after returning from this function:
+    // 'copy_block->AddInstruction(copy_instr)'.
+    copy_instr->SetRawInputAt(i, copy_input);
+  }
+}
+
+void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr,
+                                                         const HEnvironment* orig_env) {
+  if (orig_env->GetParent() != nullptr) {
+    DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent());
+  }
+  HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr);
+
+  for (size_t i = 0; i < orig_env->Size(); i++) {
+    HInstruction* env_input = orig_env->GetInstructionAt(i);
+    if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) {
+      env_input = GetInstrCopy(env_input);
+      DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr);
+    }
+    copy_env->SetRawEnvAt(i, env_input);
+    if (env_input != nullptr) {
+      env_input->AddEnvUseAt(copy_env, i);
+    }
+  }
+  // InsertRawEnvironment assumes that instruction already has an environment that's why we use
+  // SetRawEnvironment in the 'else' case.
+  // As this function calls itself recursively with the same copy_instr - this copy_instr may
+  // have partially copied chain of HEnvironments.
+  if (copy_instr->HasEnvironment()) {
+    copy_instr->InsertRawEnvironment(copy_env);
+  } else {
+    copy_instr->SetRawEnvironment(copy_env);
+  }
+}
+
+//
+// Helpers for RemapEdgesSuccessors.
+//
+
+void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block,
+                                                       HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+
+  size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  size_t phi_input_count = 0;
+  // This flag reflects whether the original successor has at least one phi and this phi
+  // has been already processed in the loop. Used for validation purposes in DCHECK to check that
+  // in the end all of the phis in the copy successor have the same number of inputs - the number
+  // of copy successor's predecessors.
+  bool first_phi_met = false;
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(this_index);
+    // Remove corresponding input for original phi.
+    orig_phi->RemoveInputAt(this_index);
+    // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds
+    // to orig_block, so add the input at the end of the list.
+    copy_phi->AddInput(orig_phi_input);
+    if (!first_phi_met) {
+      phi_input_count = copy_phi->InputCount();
+      first_phi_met = true;
+    } else {
+      DCHECK_EQ(phi_input_count, copy_phi->InputCount());
+    }
+  }
+  // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds
+  // to the previously added phi inputs position.
+  orig_block->ReplaceSuccessor(orig_succ, copy_succ);
+  DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count);
+}
+
+void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block,
+                                           HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_block = GetBlockCopy(orig_block);
+  HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+  copy_block->AddSuccessor(copy_succ);
+
+  size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+    copy_phi->AddInput(orig_phi_input);
+  }
+}
+
+void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
+                                             HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_block = GetBlockCopy(orig_block);
+  copy_block->AddSuccessor(orig_succ);
+  DCHECK(copy_block->HasSuccessor(orig_succ));
+
+  size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+    orig_phi->AddInput(orig_phi_input);
+  }
+}
+
+//
+// Local versions of CF calculation/adjustment routines.
+//
+
+// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect
+// the performance of the base version by checking the local set.
+// TODO: this version works when updating the back edges info for natural loop-based local_set.
+// Check which exactly types of subgraphs can be analysed or rename it to
+// FindBackEdgesInTheNaturalLoop.
+void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) {
+  ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
+  DCHECK_EQ(visited.GetHighestBitSet(), -1);
+
+  // Nodes that we're currently visiting, indexed by block id.
+  ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder);
+  // Number of successors visited from a given node, indexed by block id.
+  ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
+  // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
+  constexpr size_t kDefaultWorklistSize = 8;
+  worklist.reserve(kDefaultWorklistSize);
+
+  visited.SetBit(entry_block->GetBlockId());
+  visiting.SetBit(entry_block->GetBlockId());
+  worklist.push_back(entry_block);
+
+  while (!worklist.empty()) {
+    HBasicBlock* current = worklist.back();
+    uint32_t current_id = current->GetBlockId();
+    if (successors_visited[current_id] == current->GetSuccessors().size()) {
+      visiting.ClearBit(current_id);
+      worklist.pop_back();
+    } else {
+      HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+      uint32_t successor_id = successor->GetBlockId();
+      if (!local_set->IsBitSet(successor_id)) {
+        continue;
+      }
+
+      if (visiting.IsBitSet(successor_id)) {
+        DCHECK(ContainsElement(worklist, successor));
+        successor->AddBackEdgeWhileUpdating(current);
+      } else if (!visited.IsBitSet(successor_id)) {
+        visited.SetBit(successor_id);
+        visiting.SetBit(successor_id);
+        worklist.push_back(successor);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
+  // TODO: DCHECK that after the transformation the graph is connected.
+  HBasicBlock* block_entry = nullptr;
+
+  if (outer_loop_ == nullptr) {
+    for (auto block : graph_->GetBlocks()) {
+      if (block != nullptr) {
+        outer_loop_bb_set->SetBit(block->GetBlockId());
+        HLoopInformation* info = block->GetLoopInformation();
+        if (info != nullptr) {
+          info->ResetBasicBlockData();
+        }
+      }
+    }
+    block_entry = graph_->GetEntryBlock();
+  } else {
+    outer_loop_bb_set->Copy(&outer_loop_bb_set_);
+    block_entry = outer_loop_->GetHeader();
+
+    // Add newly created copy blocks.
+    for (auto entry : *bb_map_) {
+      outer_loop_bb_set->SetBit(entry.second->GetBlockId());
+    }
+
+    // Clear loop_info for the whole outer loop.
+    for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+      HBasicBlock* block = GetBlockById(idx);
+      HLoopInformation* info = block->GetLoopInformation();
+      if (info != nullptr) {
+        info->ResetBasicBlockData();
+      }
+    }
+  }
+
+  FindBackEdgesLocal(block_entry, outer_loop_bb_set);
+
+  for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+    HLoopInformation* info = block->GetLoopInformation();
+    // Reset LoopInformation for regular blocks and old headers which are no longer loop headers.
+    if (info != nullptr &&
+        (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) {
+      block->SetLoopInformation(nullptr);
+    }
+  }
+}
+
+// This is a modified version of HGraph::AnalyzeLoops.
+GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) {
+  // We iterate post order to ensure we visit inner loops before outer loops.
+  // `PopulateRecursive` needs this guarantee to know whether a natural loop
+  // contains an irreducible loop.
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+      continue;
+    }
+    if (block->IsLoopHeader()) {
+      if (block->IsCatchBlock()) {
+        // TODO: Dealing with exceptional back edges could be tricky because
+        //       they only approximate the real control flow. Bail out for now.
+        return kAnalysisFailThrowCatchLoop;
+      }
+      block->GetLoopInformation()->Populate();
+    }
+  }
+
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+      continue;
+    }
+    if (block->IsLoopHeader()) {
+      HLoopInformation* cur_loop = block->GetLoopInformation();
+      HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation();
+      if (outer_loop != nullptr) {
+        outer_loop->PopulateInnerLoopUpwards(cur_loop);
+      }
+    }
+  }
+
+  return kAnalysisSuccess;
+}
+
+void SuperblockCloner::CleanUpControlFlow() {
+  // TODO: full control flow clean up for now, optimize it.
+  graph_->ClearDominanceInformation();
+
+  ArenaBitVector outer_loop_bb_set(
+      arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+  // TODO: do it locally.
+  graph_->SimplifyCFG();
+  graph_->ComputeDominanceInformation();
+
+  // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just
+  // before in ComputeDominanceInformation.
+  GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set);
+  DCHECK_EQ(result, kAnalysisSuccess);
+
+  // TODO: do it locally
+  OrderLoopsHeadersPredecessors(graph_);
+
+  graph_->ComputeTryBlockInformation();
+}
+
+//
+// Helpers for ResolveDataFlow
+//
+
+void SuperblockCloner::ResolvePhi(HPhi* phi) {
+  HBasicBlock* phi_block = phi->GetBlock();
+  for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+    HInstruction* input = phi->InputAt(i);
+    HBasicBlock* input_block = input->GetBlock();
+
+    // Originally defined outside the region.
+    if (!IsInOrigBBSet(input_block)) {
+      continue;
+    }
+    HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i];
+    if (!IsInOrigBBSet(corresponding_block)) {
+      phi->ReplaceInput(GetInstrCopy(input), i);
+    }
+  }
+}
+
+//
+// Main algorithm methods.
+//
+
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+  DCHECK(exits->empty());
+  for (uint32_t block_id : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(block_id);
+    for (HBasicBlock* succ : block->GetSuccessors()) {
+      if (!IsInOrigBBSet(succ)) {
+        exits->push_back(succ);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
+  DCHECK(outer_loop_ == nullptr);
+  ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+  SearchForSubgraphExits(&exits);
+
+  // For a reducible graph we need to update back-edges and dominance information only for
+  // the outermost loop which is affected by the transformation - it can be found by picking
+  // the common most outer loop of loops to which the subgraph exits blocks belong.
+  // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case).
+  for (HBasicBlock* exit : exits) {
+    HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation();
+    if (loop_exit_loop_info == nullptr) {
+      outer_loop_ = nullptr;
+      break;
+    }
+    outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
+  }
+
+  if (outer_loop_ != nullptr) {
+    // Save the loop population info as it will be changed later.
+    outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks());
+  }
+}
+
+void SuperblockCloner::RemapEdgesSuccessors() {
+  // Redirect incoming edges.
+  for (HEdge e : *remap_incoming_) {
+    HBasicBlock* orig_block = GetBlockById(e.GetFrom());
+    HBasicBlock* orig_succ = GetBlockById(e.GetTo());
+    RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+  }
+
+  // Redirect internal edges.
+  for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+    HBasicBlock* orig_block = GetBlockById(orig_block_id);
+
+    for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) {
+      uint32_t orig_succ_id = orig_succ->GetBlockId();
+
+      // Check for outgoing edge.
+      if (!IsInOrigBBSet(orig_succ)) {
+        HBasicBlock* copy_block = GetBlockCopy(orig_block);
+        copy_block->AddSuccessor(orig_succ);
+        continue;
+      }
+
+      auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+      auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+
+      // Due to construction all successors of copied block were set to original.
+      if (copy_redir != remap_copy_internal_->end()) {
+        RemapCopyInternalEdge(orig_block, orig_succ);
+      } else {
+        AddCopyInternalEdge(orig_block, orig_succ);
+      }
+
+      if (orig_redir != remap_orig_internal_->end()) {
+        RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::AdjustControlFlowInfo() {
+  ArenaBitVector outer_loop_bb_set(
+      arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+  graph_->ClearDominanceInformation();
+  // TODO: Do it locally.
+  graph_->ComputeDominanceInformation();
+}
+
+// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to
+// the valid values; only phis' inputs must be adjusted.
+void SuperblockCloner::ResolveDataFlow() {
+  for (auto entry : *bb_map_) {
+    HBasicBlock* orig_block = entry.first;
+
+    for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* orig_phi = it.Current()->AsPhi();
+      HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+      ResolvePhi(orig_phi);
+      ResolvePhi(copy_phi);
+    }
+    if (kIsDebugBuild) {
+      // Inputs of instruction copies must be already mapped to correspondent inputs copies.
+      for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+        CheckInstructionInputsRemapping(it.Current());
+      }
+    }
+  }
+}
+
+//
+// Debug and logging methods.
+//
+
+void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
+  DCHECK(!orig_instr->IsPhi());
+  HInstruction* copy_instr = GetInstrCopy(orig_instr);
+  for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+    HInstruction* orig_input = orig_instr->InputAt(i);
+    DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock()));
+
+    // If original input is defined outside the region then it will remain for both original
+    // instruction and the copy after the transformation.
+    if (!IsInOrigBBSet(orig_input->GetBlock())) {
+      continue;
+    }
+    HInstruction* copy_input = GetInstrCopy(orig_input);
+    DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+  }
+
+  // Resolve environment.
+  if (orig_instr->HasEnvironment()) {
+    HEnvironment* orig_env = orig_instr->GetEnvironment();
+
+    for (size_t i = 0, e = orig_env->Size(); i < e; ++i) {
+      HInstruction* orig_input = orig_env->GetInstructionAt(i);
+
+      // If original input is defined outside the region then it will remain for both original
+      // instruction and the copy after the transformation.
+      if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) {
+        continue;
+      }
+
+      HInstruction* copy_input = GetInstrCopy(orig_input);
+      DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+    }
+  }
+}
+
+//
+// Public methods.
+//
+
+SuperblockCloner::SuperblockCloner(HGraph* graph,
+                                   const HBasicBlockSet* orig_bb_set,
+                                   HBasicBlockMap* bb_map,
+                                   HInstructionMap* hir_map)
+  : graph_(graph),
+    arena_(graph->GetAllocator()),
+    orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+    remap_orig_internal_(nullptr),
+    remap_copy_internal_(nullptr),
+    remap_incoming_(nullptr),
+    bb_map_(bb_map),
+    hir_map_(hir_map),
+    outer_loop_(nullptr),
+    outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+  orig_bb_set_.Copy(orig_bb_set);
+}
+
+void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+                                                 const HEdgeSet* remap_copy_internal,
+                                                 const HEdgeSet* remap_incoming) {
+  remap_orig_internal_ = remap_orig_internal;
+  remap_copy_internal_ = remap_copy_internal;
+  remap_incoming_ = remap_incoming;
+}
+
+bool SuperblockCloner::IsSubgraphClonable() const {
+  // TODO: Support irreducible graphs and graphs with try-catch.
+  if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) {
+    return false;
+  }
+
+  // Check that there are no instructions defined in the subgraph and used outside.
+  // TODO: Improve this by accepting graph with such uses but only one exit.
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (!instr->IsClonable() ||
+          IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        return false;
+      }
+    }
+
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (!instr->IsClonable() ||
+          IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void SuperblockCloner::Run() {
+  DCHECK(bb_map_ != nullptr);
+  DCHECK(hir_map_ != nullptr);
+  DCHECK(remap_orig_internal_ != nullptr &&
+         remap_copy_internal_ != nullptr &&
+         remap_incoming_ != nullptr);
+  DCHECK(IsSubgraphClonable());
+
+  // Find an area in the graph for which control flow information should be adjusted.
+  FindAndSetLocalAreaForAdjustments();
+  // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
+  // adjusted.
+  CloneBasicBlocks();
+  // Connect the blocks together/remap successors and fix phis which are directly affected my the
+  // remapping.
+  RemapEdgesSuccessors();
+  // Recalculate dominance and backedge information which is required by the next stage.
+  AdjustControlFlowInfo();
+  // Fix data flow of the graph.
+  ResolveDataFlow();
+}
+
+void SuperblockCloner::CleanUp() {
+  CleanUpControlFlow();
+
+  // Remove phis which have all inputs being same.
+  // When a block has a single predecessor it must not have any phis. However after the
+  // transformation it could happen that there is such block with a phi with a single input.
+  // As this is needed to be processed we also simplify phis with multiple same inputs here.
+  for (auto entry : *bb_map_) {
+    HBasicBlock* orig_block = entry.first;
+    for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      HPhi* phi = inst_it.Current()->AsPhi();
+      if (ArePhiInputsTheSame(phi)) {
+        phi->ReplaceWith(phi->InputAt(0));
+        orig_block->RemovePhi(phi);
+      }
+    }
+
+    HBasicBlock* copy_block = GetBlockCopy(orig_block);
+    for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      HPhi* phi = inst_it.Current()->AsPhi();
+      if (ArePhiInputsTheSame(phi)) {
+        phi->ReplaceWith(phi->InputAt(0));
+        copy_block->RemovePhi(phi);
+      }
+    }
+  }
+}
+
+HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
+  HGraph* graph = orig_block->GetGraph();
+  HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc());
+  graph->AddBlock(copy_block);
+
+  // Clone all the phis and add them to the map.
+  for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+    HInstruction* orig_instr = it.Current();
+    HInstruction* copy_instr = orig_instr->Clone(arena_);
+    copy_block->AddPhi(copy_instr->AsPhi());
+    copy_instr->AsPhi()->RemoveAllInputs();
+    DCHECK(!orig_instr->HasEnvironment());
+    hir_map_->Put(orig_instr, copy_instr);
+  }
+
+  // Clone all the instructions and add them to the map.
+  for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* orig_instr = it.Current();
+    HInstruction* copy_instr = orig_instr->Clone(arena_);
+    ReplaceInputsWithCopies(copy_instr);
+    copy_block->AddInstruction(copy_instr);
+    if (orig_instr->HasEnvironment()) {
+      DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment());
+    }
+    hir_map_->Put(orig_instr, copy_instr);
+  }
+
+  return copy_block;
+}
+
+void SuperblockCloner::CloneBasicBlocks() {
+  // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied
+  // instructions might be replaced by copies of the original inputs (depending where those inputs
+  // are defined). So the definitions of the original inputs must be visited before their original
+  // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')"
+  // guarantees that.
+  for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) {
+    if (!IsInOrigBBSet(orig_block)) {
+      continue;
+    }
+    HBasicBlock* copy_block = CloneBasicBlock(orig_block);
+    bb_map_->Put(orig_block, copy_block);
+    if (kSuperblockClonerLogging) {
+      std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
+                   std::endl;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
new file mode 100644
index 0000000..23de692
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+
+#include "base/arena_bit_vector.h"
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
+#include "nodes.h"
+
+namespace art {
+
+static const bool kSuperblockClonerLogging = false;
+static const bool kSuperblockClonerVerify = false;
+
+// Represents an edge between two HBasicBlocks.
+//
+// Note: objects of this class are small - pass them by value.
+class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
+ public:
+  HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
+    DCHECK_NE(to_, kInvalidBlockId);
+    DCHECK_NE(from_, kInvalidBlockId);
+  }
+  HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
+    DCHECK_NE(to_, kInvalidBlockId);
+    DCHECK_NE(from_, kInvalidBlockId);
+  }
+  HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
+
+  uint32_t GetFrom() const { return from_; }
+  uint32_t GetTo() const { return to_; }
+
+  bool operator==(const HEdge& other) const {
+    return this->from_ == other.from_ && this->to_ == other.to_;
+  }
+
+  bool operator!=(const HEdge& other) const { return !operator==(other); }
+  void Dump(std::ostream& stream) const;
+
+  // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
+  // has to_ block as a successor.
+  bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
+
+ private:
+  // Predecessor block id.
+  uint32_t from_;
+  // Successor block id.
+  uint32_t to_;
+};
+
+// Returns whether a HEdge edge corresponds to an existing edge in the graph.
+inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
+  if (!edge.IsValid()) {
+    return false;
+  }
+  uint32_t from = edge.GetFrom();
+  uint32_t to = edge.GetTo();
+  if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
+    return false;
+  }
+
+  HBasicBlock* block_from = graph->GetBlocks()[from];
+  HBasicBlock* block_to = graph->GetBlocks()[to];
+  if (block_from == nullptr || block_to == nullptr) {
+    return false;
+  }
+
+  return block_from->HasSuccessor(block_to, 0);
+}
+
+// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
+// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
+// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
+// and a set of rules how to treat edges, remap their successors. By using this approach such
+// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
+//
+// The idea of the transformation is based on "Superblock cloning" technique described in the book
+// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
+// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
+// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
+// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
+//
+// There are two states of the IR graph: original graph (before the transformation) and
+// copy graph (after).
+//
+// Before the transformation:
+// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
+// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
+// where pred, succ - basic blocks):
+//  - internal - pred, succ are members of ‘orig_bb_set’.
+//  - outside  - pred, succ are not members of ‘orig_bb_set’.
+//  - incoming - pred is not a member of ‘orig_bb_set’, succ is.
+//  - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
+//
+// Transformation:
+//
+// 1. Initial cloning:
+//   1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
+//        form ‘copy_bb_set’.
+//   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
+//        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
+//        set.
+//   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
+//        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
+//        set.
+// 2. Successors remapping.
+//   2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
+//        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
+//   2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
+//        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
+//   2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
+//        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
+//        (X, Y_1)).
+// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
+// 4. Fix/resolve data flow.
+// 5. Do cleanups (DCE, critical edges splitting, etc).
+//
+class SuperblockCloner : public ValueObject {
+ public:
+  // TODO: Investigate optimal types for the containers.
+  using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
+  using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
+  using HBasicBlockSet = ArenaBitVector;
+  using HEdgeSet = ArenaHashSet<HEdge>;
+
+  SuperblockCloner(HGraph* graph,
+                   const HBasicBlockSet* orig_bb_set,
+                   HBasicBlockMap* bb_map,
+                   HInstructionMap* hir_map);
+
+  // Sets edge successor remapping info specified by corresponding edge sets.
+  void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+                                 const HEdgeSet* remap_copy_internal,
+                                 const HEdgeSet* remap_incoming);
+
+  // Returns whether the specified subgraph is copyable.
+  // TODO: Start from small range of graph patterns then extend it.
+  bool IsSubgraphClonable() const;
+
+  // Runs the copy algorithm according to the description.
+  void Run();
+
+  // Cleans up the graph after transformation: splits critical edges, recalculates control flow
+  // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
+  void CleanUp();
+
+  // Returns a clone of a basic block (orig_block).
+  //
+  //  - The copy block will have no successors/predecessors; they should be set up manually.
+  //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
+  //    this correspondence is recorded in the map (old instruction, new instruction).
+  //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
+  //    same, as in the original block, PHIs do not reflect a correct correspondence between the
+  //    value and predecessors (as the copy block has no predecessors by now), etc.
+  HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
+
+  // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
+  // and hir_map_.
+  void CloneBasicBlocks();
+
+  HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
+    auto copy_input_iter = hir_map_->find(orig_instr);
+    DCHECK(copy_input_iter != hir_map_->end());
+    return copy_input_iter->second;
+  }
+
+  HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
+    HBasicBlock* block = bb_map_->Get(orig_block);
+    DCHECK(block != nullptr);
+    return block;
+  }
+
+  HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
+    for (auto it : *hir_map_) {
+      if (it.second == copy_instr) {
+        return it.first;
+      }
+    }
+    return nullptr;
+  }
+
+  bool IsInOrigBBSet(uint32_t block_id) const {
+    return orig_bb_set_.IsBitSet(block_id);
+  }
+
+  bool IsInOrigBBSet(const HBasicBlock* block) const {
+    return IsInOrigBBSet(block->GetBlockId());
+  }
+
+ private:
+  // Fills the 'exits' vector with the subgraph exits.
+  void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+
+  // Finds and records information about the area in the graph for which control-flow (back edges,
+  // loops, dominators) needs to be adjusted.
+  void FindAndSetLocalAreaForAdjustments();
+
+  // Remaps edges' successors according to the info specified in the edges sets.
+  //
+  // Only edge successors/predecessors and phis' input records (to have a correspondence between
+  // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
+  // phis' nor instructions' inputs values are resolved.
+  void RemapEdgesSuccessors();
+
+  // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+  // FindAndSetLocalAreaForAdjustments.
+  void AdjustControlFlowInfo();
+
+  // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
+  // the SSA form.
+  void ResolveDataFlow();
+
+  //
+  // Helpers for CloneBasicBlock.
+  //
+
+  // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
+  // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
+  void ReplaceInputsWithCopies(HInstruction* copy_instr);
+
+  // Recursively clones the environment for the copy instruction. If the input of the original
+  // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
+  // leaves it the same as original.
+  void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
+
+  //
+  // Helpers for RemapEdgesSuccessors.
+  //
+
+  // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
+  // copy_succ.
+  void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
+  void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
+  void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  //
+  // Local versions of control flow calculation/adjustment routines.
+  //
+
+  void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
+  void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
+  GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
+  void CleanUpControlFlow();
+
+  //
+  // Helpers for ResolveDataFlow
+  //
+
+  // Resolves the inputs of the phi.
+  void ResolvePhi(HPhi* phi);
+
+  //
+  // Debug and logging methods.
+  //
+  void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+
+  HBasicBlock* GetBlockById(uint32_t block_id) const {
+    DCHECK(block_id < graph_->GetBlocks().size());
+    HBasicBlock* block = graph_->GetBlocks()[block_id];
+    DCHECK(block != nullptr);
+    return block;
+  }
+
+  HGraph* const graph_;
+  ArenaAllocator* const arena_;
+
+  // Set of basic block in the original graph to be copied.
+  HBasicBlockSet orig_bb_set_;
+
+  // Sets of edges which require successors remapping.
+  const HEdgeSet* remap_orig_internal_;
+  const HEdgeSet* remap_copy_internal_;
+  const HEdgeSet* remap_incoming_;
+
+  // Correspondence map for blocks: (original block, copy block).
+  HBasicBlockMap* bb_map_;
+  // Correspondence map for instructions: (original HInstruction, copy HInstruction).
+  HInstructionMap* hir_map_;
+  // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+  HLoopInformation* outer_loop_;
+  HBasicBlockSet outer_loop_bb_set_;
+
+  ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+
+  DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
+};
+
+}  // namespace art
+
+namespace std {
+
+template <>
+struct hash<art::HEdge> {
+  size_t operator()(art::HEdge const& x) const noexcept  {
+    // Use Cantor pairing function as the hash function.
+    uint32_t a = x.GetFrom();
+    uint32_t b = x.GetTo();
+    return (a + b) * (a + b + 1) / 2 + b;
+  }
+};
+
+}  // namespace std
+
+#endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index fd77eb8..f1b7bff 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -17,11 +17,15 @@
 #include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "superblock_cloner.h"
 
 #include "gtest/gtest.h"
 
 namespace art {
 
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+
 // This class provides methods and helpers for testing various cloning and copying routines:
 // individual instruction cloning and cloning of the more coarse-grain structures.
 class SuperblockClonerTest : public OptimizingUnitTest {
@@ -182,4 +186,121 @@
   EXPECT_NE(new_suspend_check, nullptr);
 }
 
+// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of
+// instructions' inputs.
+TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  CreateBasicLoopControlFlow(&header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  ASSERT_TRUE(CheckGraph());
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          &bb_map,
+                          &hir_map);
+  EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+  cloner.CloneBasicBlocks();
+
+  EXPECT_EQ(bb_map.size(), 2u);
+  EXPECT_EQ(hir_map.size(), 12u);
+
+  for (auto it : hir_map) {
+    HInstruction* orig_instr = it.first;
+    HInstruction* copy_instr = it.second;
+
+    EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock());
+    EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind());
+    EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType());
+
+    if (orig_instr->IsPhi()) {
+      continue;
+    }
+
+    EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount());
+
+    // Check that inputs match.
+    for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+      HInstruction* orig_input = orig_instr->InputAt(i);
+      HInstruction* copy_input = copy_instr->InputAt(i);
+      if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+        EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+      } else {
+        EXPECT_EQ(orig_input, copy_input);
+      }
+    }
+
+    EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment());
+
+    // Check that environments match.
+    if (orig_instr->HasEnvironment()) {
+      HEnvironment* orig_env = orig_instr->GetEnvironment();
+      HEnvironment* copy_env = copy_instr->GetEnvironment();
+
+      EXPECT_EQ(copy_env->GetParent(), nullptr);
+      EXPECT_EQ(orig_env->Size(), copy_env->Size());
+
+      for (size_t i = 0, e = orig_env->Size(); i < e; i++) {
+        HInstruction* orig_input = orig_env->GetInstructionAt(i);
+        HInstruction* copy_input = copy_env->GetInstructionAt(i);
+        if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+          EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+        } else {
+          EXPECT_EQ(orig_input, copy_input);
+        }
+      }
+    }
+  }
+}
+
+// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control
+// flow.
+TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  CreateBasicLoopControlFlow(&header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  ASSERT_TRUE(CheckGraph());
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          nullptr,
+                          nullptr);
+  EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+  cloner.FindAndSetLocalAreaForAdjustments();
+  cloner.CleanUpControlFlow();
+
+  EXPECT_TRUE(CheckGraph());
+
+  EXPECT_TRUE(entry_block_->Dominates(header));
+  EXPECT_TRUE(entry_block_->Dominates(exit_block_));
+
+  EXPECT_EQ(header->GetLoopInformation(), loop_info);
+  EXPECT_EQ(loop_info->GetHeader(), header);
+  EXPECT_TRUE(loop_info->Contains(*loop_body));
+  EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
+}
+
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 0a09435..674dc9a 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -153,7 +153,7 @@
   " 21c:	f8d9 8034 	ldr.w	r8, [r9, #52]	; 0x34\n",
   " 220:	4770      	bx	lr\n",
   " 222:	4660      	mov	r0, ip\n",
-  " 224:	f8d9 c2c0 	ldr.w	ip, [r9, #704]	; 0x2c0\n",
+  " 224:	f8d9 c2c4 	ldr.w	ip, [r9, #708]	; 0x2c4\n",
   " 228:	47e0      	blx	ip\n",
   nullptr
 };
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 7796b3a..34ba4b3 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1829,6 +1829,7 @@
     jobject class_loader = nullptr;
     if (!IsBootImage()) {
       class_loader = class_loader_context_->CreateClassLoader(dex_files_);
+      callbacks_->SetDexFiles(&dex_files);
     }
 
     // Register dex caches and key them to the class loader so that they only unload when the
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 849887c..f790db2 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -46,6 +46,7 @@
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "jit/profile_compilation_info.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
 #include "linker/index_bss_mapping_encoder.h"
@@ -1336,7 +1337,7 @@
       bool has_code_info = method_header->IsOptimized();
       // Record debug information for this function if we are doing that.
       debug::MethodDebugInfo& info = writer_->method_info_[debug_info_idx];
-      DCHECK(info.trampoline_name.empty());
+      DCHECK(info.custom_name.empty());
       info.dex_file = method_ref.dex_file;
       info.class_def_index = class_def_index;
       info.dex_method_index = method_ref.index;
@@ -2420,7 +2421,7 @@
       (field) = compiler_driver_->Create ## fn_name();                      \
       if (generate_debug_info) {                                            \
         debug::MethodDebugInfo info = {};                                   \
-        info.trampoline_name = #fn_name;                                    \
+        info.custom_name = #fn_name;                                        \
         info.isa = instruction_set;                                         \
         info.is_code_address_text_relative = true;                          \
         /* Use the code offset rather than the `adjusted_offset`. */        \
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index c45166b..4e5fd72 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -32,6 +32,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "jit/profile_compilation_info.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/elf_writer.h"
 #include "linker/elf_writer_quick.h"
@@ -485,7 +486,7 @@
   EXPECT_EQ(76U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(162 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/dexdump/Android.bp b/dexdump/Android.bp
index 4916d64..eca0844 100644
--- a/dexdump/Android.bp
+++ b/dexdump/Android.bp
@@ -14,33 +14,38 @@
 
 // TODO(ajcbik): rename dexdump2 into dexdump when Dalvik version is removed
 
-art_cc_binary {
-    name: "dexdump2",
-    host_supported: true,
+
+cc_defaults {
+    name: "dexdump_defaults",
     srcs: [
         "dexdump_cfg.cc",
         "dexdump_main.cc",
         "dexdump.cc",
     ],
     cflags: ["-Wall", "-Werror"],
+    // TODO: fix b/72216369 and remove the need for this.
+    include_dirs: [
+        "art/runtime"  // dex utils.
+    ],
+}
+
+art_cc_binary {
+    name: "dexdump2",
+    defaults: ["dexdump_defaults"],
+    host_supported: true,
     shared_libs: [
-        "libart",
+        "libdexfile",
         "libbase",
     ],
 }
 
 art_cc_binary {
     name: "dexdumps",
+    defaults: ["dexdump_defaults"],
     host_supported: true,
     device_supported: false,
-    srcs: [
-        "dexdump_cfg.cc",
-        "dexdump_main.cc",
-        "dexdump.cc",
-    ],
-    cflags: ["-Wall", "-Werror"],
     static_libs: [
-        "libart",
+        "libdexfile",
         "libbase",
     ] + art_static_dependencies,
     target: {
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 1518e1d..16cb302 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -34,8 +34,13 @@
 
 #include "dexdump.h"
 
+#include <fcntl.h>
 #include <inttypes.h>
 #include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
 #include <iostream>
 #include <memory>
@@ -44,7 +49,6 @@
 
 #include "android-base/stringprintf.h"
 
-#include "dex/art_dex_file_loader.h"
 #include "dex/code_item_accessors-no_art-inl.h"
 #include "dex/dex_file-inl.h"
 #include "dex/dex_file_exception_helpers.h"
@@ -1868,6 +1872,34 @@
   }
 }
 
+static bool openAndMapFile(const char* fileName,
+                           const uint8_t** base,
+                           size_t* size,
+                           std::string* error_msg) {
+  int fd = open(fileName, O_RDONLY);
+  if (fd < 0) {
+    *error_msg = "open failed";
+    return false;
+  }
+  struct stat st;
+  if (fstat(fd, &st) < 0) {
+    *error_msg = "stat failed";
+    return false;
+  }
+  *size = st.st_size;
+  if (*size == 0) {
+    *error_msg = "size == 0";
+    return false;
+  }
+  void* addr = mmap(nullptr /*addr*/, *size, PROT_READ, MAP_PRIVATE, fd, 0 /*offset*/);
+  if (addr == MAP_FAILED) {
+    *error_msg = "mmap failed";
+    return false;
+  }
+  *base = reinterpret_cast<const uint8_t*>(addr);
+  return true;
+}
+
 /*
  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
  */
@@ -1879,12 +1911,18 @@
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
   const bool kVerifyChecksum = !gOptions.ignoreBadChecksum;
+  const uint8_t* base = nullptr;
+  size_t size = 0;
   std::string error_msg;
-  // TODO: Use DexFileLoader when that is implemented.
-  const ArtDexFileLoader dex_file_loader;
+  if (!openAndMapFile(fileName, &base, &size, &error_msg)) {
+    fputs(error_msg.c_str(), stderr);
+    fputc('\n', stderr);
+    return -1;
+  }
+  const DexFileLoader dex_file_loader;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!dex_file_loader.Open(
-        fileName, fileName, /* verify */ true, kVerifyChecksum, &error_msg, &dex_files)) {
+  if (!dex_file_loader.OpenAll(
+        base, size, fileName, /*verify*/ true, kVerifyChecksum, &error_msg, &dex_files)) {
     // Display returned error message to user. Note that this error behavior
     // differs from the error messages shown by the original Dalvik dexdump.
     fputs(error_msg.c_str(), stderr);
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 382b551..2247e7a 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -28,12 +28,6 @@
 #include <string.h>
 #include <unistd.h>
 
-#include <android-base/logging.h>
-
-#include <base/logging.h>  // For InitLogging.
-#include "mem_map.h"
-#include "runtime.h"
-
 namespace art {
 
 static const char* gProgName = "dexdump";
@@ -61,10 +55,6 @@
  * Main driver of the dexdump utility.
  */
 int dexdumpDriver(int argc, char** argv) {
-  // Art specific set up.
-  InitLogging(argv, Runtime::Abort);
-  MemMap::Init();
-
   // Reset options.
   bool wantUsage = false;
   memset(&gOptions, 0, sizeof(gOptions));
diff --git a/dexlist/Android.bp b/dexlist/Android.bp
index 8ecff42..2703732 100644
--- a/dexlist/Android.bp
+++ b/dexlist/Android.bp
@@ -17,7 +17,11 @@
     host_supported: true,
     srcs: ["dexlist.cc"],
     cflags: ["-Wall", "-Werror"],
-    shared_libs: ["libart", "libbase"],
+    shared_libs: ["libdexfile", "libbase"],
+    // TODO: fix b/72216369 and remove the need for this.
+    include_dirs: [
+        "art/runtime"  // dex utils.
+    ],
 }
 
 art_cc_test {
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 1ced8ca..8daaef1 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -23,16 +23,18 @@
  * List all methods in all concrete classes in one or more DEX files.
  */
 
+#include <fcntl.h>
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
-#include "base/logging.h"  // For InitLogging.
-#include "dex/art_dex_file_loader.h"
 #include "dex/code_item_accessors-no_art-inl.h"
 #include "dex/dex_file-inl.h"
 #include "dex/dex_file_loader.h"
-#include "mem_map.h"
-#include "runtime.h"
 
 namespace art {
 
@@ -166,6 +168,34 @@
   }
 }
 
+static bool openAndMapFile(const char* fileName,
+                           const uint8_t** base,
+                           size_t* size,
+                           std::string* error_msg) {
+  int fd = open(fileName, O_RDONLY);
+  if (fd < 0) {
+    *error_msg = "open failed";
+    return false;
+  }
+  struct stat st;
+  if (fstat(fd, &st) < 0) {
+    *error_msg = "stat failed";
+    return false;
+  }
+  *size = st.st_size;
+  if (*size == 0) {
+    *error_msg = "size == 0";
+    return false;
+  }
+  void* addr = mmap(nullptr /*addr*/, *size, PROT_READ, MAP_PRIVATE, fd, 0 /*offset*/);
+  if (addr == MAP_FAILED) {
+    *error_msg = "mmap failed";
+    return false;
+  }
+  *base = reinterpret_cast<const uint8_t*>(addr);
+  return true;
+}
+
 /*
  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
  */
@@ -173,11 +203,18 @@
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
   static constexpr bool kVerifyChecksum = true;
+  const uint8_t* base = nullptr;
+  size_t size = 0;
   std::string error_msg;
+  if (!openAndMapFile(fileName, &base, &size, &error_msg)) {
+    fputs(error_msg.c_str(), stderr);
+    fputc('\n', stderr);
+    return -1;
+  }
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  const ArtDexFileLoader dex_file_loader;
-  if (!dex_file_loader.Open(
-        fileName, fileName, /* verify */ true, kVerifyChecksum, &error_msg, &dex_files)) {
+  const DexFileLoader dex_file_loader;
+  if (!dex_file_loader.OpenAll(
+        base, size, fileName, /*verify*/ true, kVerifyChecksum, &error_msg, &dex_files)) {
     fputs(error_msg.c_str(), stderr);
     fputc('\n', stderr);
     return -1;
@@ -209,10 +246,6 @@
  * Main driver of the dexlist utility.
  */
 int dexlistDriver(int argc, char** argv) {
-  // Art specific set up.
-  InitLogging(argv, Runtime::Abort);
-  MemMap::Init();
-
   // Reset options.
   bool wantUsage = false;
   memset(&gOptions, 0, sizeof(gOptions));
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 2b90614..fcbf2f1 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -180,7 +180,7 @@
     #define DO_TRAMPOLINE(fn_name)                                                \
       if (oat_header.Get ## fn_name ## Offset() != 0) {                           \
         debug::MethodDebugInfo info = {};                                         \
-        info.trampoline_name = #fn_name;                                          \
+        info.custom_name = #fn_name;                                              \
         info.isa = oat_header.GetInstructionSet();                                \
         info.is_code_address_text_relative = true;                                \
         size_t code_offset = oat_header.Get ## fn_name ## Offset();               \
@@ -308,7 +308,7 @@
     const void* code_address = EntryPointToCodePointer(reinterpret_cast<void*>(entry_point));
 
     debug::MethodDebugInfo info = {};
-    DCHECK(info.trampoline_name.empty());
+    DCHECK(info.custom_name.empty());
     info.dex_file = &dex_file;
     info.class_def_index = class_def_index;
     info.dex_method_index = dex_method_index;
diff --git a/openjdkjvmti/events-inl.h b/openjdkjvmti/events-inl.h
index 007669b..74ffb84 100644
--- a/openjdkjvmti/events-inl.h
+++ b/openjdkjvmti/events-inl.h
@@ -187,7 +187,7 @@
 template <ArtJvmtiEvent kEvent, typename ...Args>
 inline std::vector<impl::EventHandlerFunc<kEvent>> EventHandler::CollectEvents(art::Thread* thread,
                                                                                Args... args) const {
-  art::MutexLock mu(thread, envs_lock_);
+  art::ReaderMutexLock mu(thread, envs_lock_);
   std::vector<impl::EventHandlerFunc<kEvent>> handlers;
   for (ArtJvmTiEnv* env : envs) {
     if (ShouldDispatch<kEvent>(env, thread, args...)) {
@@ -527,7 +527,7 @@
 }
 
 inline void EventHandler::RecalculateGlobalEventMask(ArtJvmtiEvent event) {
-  art::MutexLock mu(art::Thread::Current(), envs_lock_);
+  art::WriterMutexLock mu(art::Thread::Current(), envs_lock_);
   RecalculateGlobalEventMaskLocked(event);
 }
 
diff --git a/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
index d98fda5..62b73c0 100644
--- a/openjdkjvmti/events.cc
+++ b/openjdkjvmti/events.cc
@@ -196,12 +196,12 @@
 }
 
 void EventHandler::RegisterArtJvmTiEnv(ArtJvmTiEnv* env) {
-  art::MutexLock mu(art::Thread::Current(), envs_lock_);
+  art::WriterMutexLock mu(art::Thread::Current(), envs_lock_);
   envs.push_back(env);
 }
 
 void EventHandler::RemoveArtJvmTiEnv(ArtJvmTiEnv* env) {
-  art::MutexLock mu(art::Thread::Current(), envs_lock_);
+  art::WriterMutexLock mu(art::Thread::Current(), envs_lock_);
   // Since we might be currently iterating over the envs list we cannot actually erase elements.
   // Instead we will simply replace them with 'nullptr' and skip them manually.
   auto it = std::find(envs.begin(), envs.end(), env);
@@ -1143,7 +1143,7 @@
   {
     // Change the event masks atomically.
     art::Thread* self = art::Thread::Current();
-    art::MutexLock mu(self, envs_lock_);
+    art::WriterMutexLock mu(self, envs_lock_);
     art::WriterMutexLock mu_env_info(self, env->event_info_mutex_);
     old_state = global_mask.Test(event);
     if (mode == JVMTI_ENABLE) {
diff --git a/openjdkjvmti/events.h b/openjdkjvmti/events.h
index 81edb93..8141eff 100644
--- a/openjdkjvmti/events.h
+++ b/openjdkjvmti/events.h
@@ -283,7 +283,7 @@
   ALWAYS_INLINE
   inline void RecalculateGlobalEventMask(ArtJvmtiEvent event) REQUIRES(!envs_lock_);
   ALWAYS_INLINE
-  inline void RecalculateGlobalEventMaskLocked(ArtJvmtiEvent event) REQUIRES(envs_lock_);
+  inline void RecalculateGlobalEventMaskLocked(ArtJvmtiEvent event) REQUIRES_SHARED(envs_lock_);
 
   template <ArtJvmtiEvent kEvent>
   ALWAYS_INLINE inline void DispatchClassFileLoadHookEvent(art::Thread* thread,
@@ -310,7 +310,8 @@
   std::list<ArtJvmTiEnv*> envs GUARDED_BY(envs_lock_);
 
   // Top level lock. Nothing at all should be held when we lock this.
-  mutable art::Mutex envs_lock_ ACQUIRED_BEFORE(art::Locks::instrument_entrypoints_lock_);
+  mutable art::ReaderWriterMutex envs_lock_
+      ACQUIRED_BEFORE(art::Locks::instrument_entrypoints_lock_);
 
   // A union of all enabled events, anywhere.
   EventMask global_mask;
diff --git a/profman/boot_image_profile.cc b/profman/boot_image_profile.cc
index a750105..3d003a7 100644
--- a/profman/boot_image_profile.cc
+++ b/profman/boot_image_profile.cc
@@ -19,6 +19,7 @@
 
 #include "boot_image_profile.h"
 #include "dex/dex_file-inl.h"
+#include "jit/profile_compilation_info.h"
 #include "method_reference.h"
 #include "type_reference.h"
 
diff --git a/profman/boot_image_profile.h b/profman/boot_image_profile.h
index eb43b7c..99e5a75 100644
--- a/profman/boot_image_profile.h
+++ b/profman/boot_image_profile.h
@@ -22,10 +22,11 @@
 #include <vector>
 
 #include "dex/dex_file.h"
-#include "jit/profile_compilation_info.h"
 
 namespace art {
 
+class ProfileCompilationInfo;
+
 struct BootImageOptions {
  public:
   // Threshold for classes that may be dirty or clean. The threshold specifies how
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 07764b8..aba2b0e 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -41,12 +41,17 @@
     target: {
         android: {
             static_libs: [
+                "libziparchive",
                 "libz",
                 "libbase",
             ],
+            shared_libs: [
+                "libutils",
+            ],
         },
         host: {
             shared_libs: [
+                "libziparchive",
                 "libz",
             ],
         },
@@ -709,6 +714,7 @@
     ],
     shared_libs: [
         "libbacktrace",
+        "libziparchive",
     ],
     header_libs: [
         "art_cmdlineparser_headers", // For parsed_options_test.
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 78b9e46..80080e9 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -144,6 +144,7 @@
   qpoints->pAsin = asin;
   qpoints->pAtan = atan;
   qpoints->pAtan2 = atan2;
+  qpoints->pPow = pow;
   qpoints->pCbrt = cbrt;
   qpoints->pCosh = cosh;
   qpoints->pExp = exp;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 737d2a8..1671a24 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -794,6 +794,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
+
     push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
     .cfi_adjust_cfa_offset 16
     .cfi_rel_offset r0, 0
@@ -812,6 +815,7 @@
     .cfi_restore r2
     .cfi_restore lr
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 80bf3ab..4c43b7e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -168,6 +168,7 @@
   qpoints->pAsin = asin;
   qpoints->pAtan = atan;
   qpoints->pAtan2 = atan2;
+  qpoints->pPow = pow;
   qpoints->pCbrt = cbrt;
   qpoints->pCosh = cosh;
   qpoints->pExp = exp;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index b0e7b0a..0614118 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1333,6 +1333,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    cbz x1, .Lthrow_class_cast_exception_for_bitstring_check
+
     // Store arguments and link register
     // Stack needs to be 16B aligned on calls.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
@@ -1358,6 +1361,7 @@
     // Restore
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 1020781..3a6625f 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -54,6 +54,7 @@
       entrypoint == kQuickAsin ||
       entrypoint == kQuickAtan ||
       entrypoint == kQuickAtan2 ||
+      entrypoint == kQuickPow ||
       entrypoint == kQuickCbrt ||
       entrypoint == kQuickCosh ||
       entrypoint == kQuickExp ||
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 209f367..badee59 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -348,6 +348,8 @@
   static_assert(IsDirectEntrypoint(kQuickAtan), "Direct C stub marked non-direct.");
   qpoints->pAtan2 = atan2;
   static_assert(IsDirectEntrypoint(kQuickAtan2), "Direct C stub marked non-direct.");
+  qpoints->pPow = pow;
+  static_assert(IsDirectEntrypoint(kQuickPow), "Direct C stub marked non-direct.");
   qpoints->pCbrt = cbrt;
   static_assert(IsDirectEntrypoint(kQuickCbrt), "Direct C stub marked non-direct.");
   qpoints->pCosh = cosh;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index b2f7e10..d8fe480 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1423,6 +1423,10 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    beqz   $a1, .Lthrow_class_cast_exception_for_bitstring_check
+    nop
+
     addiu  $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sw     $gp, 16($sp)
@@ -1441,12 +1445,15 @@
     jalr   $zero, $ra
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
 .Lthrow_class_cast_exception:
     lw     $t9, 8($sp)
     lw     $a1, 4($sp)
     lw     $a0, 0($sp)
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowClassCastExceptionForObject
     jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 35cbd1d..bdfb942 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -165,6 +165,7 @@
   qpoints->pAsin = asin;
   qpoints->pAtan = atan;
   qpoints->pAtan2 = atan2;
+  qpoints->pPow = pow;
   qpoints->pCbrt = cbrt;
   qpoints->pCosh = cosh;
   qpoints->pExp = exp;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 63f4f6c..a5edc1f 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1364,6 +1364,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    beqzc  $a1, .Lthrow_class_cast_exception_for_bitstring_check
+
     daddiu $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sd     $ra, 24($sp)
@@ -1379,12 +1382,15 @@
     jalr   $zero, $ra
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
 .Lthrow_class_cast_exception:
     ld     $t9, 16($sp)
     ld     $a1, 8($sp)
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowClassCastExceptionForObject
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 24bf9cc..ffb0c94 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -68,6 +68,7 @@
   qpoints->pAsin = asin;
   qpoints->pAtan = atan;
   qpoints->pAtan2 = atan2;
+  qpoints->pPow = pow;
   qpoints->pCbrt = cbrt;
   qpoints->pCosh = cosh;
   qpoints->pExp = exp;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 5a28120..d64e2fd 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1431,6 +1431,10 @@
 END_FUNCTION art_quick_instance_of
 
 DEFINE_FUNCTION art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    testl %ecx, %ecx
+    jz .Lthrow_class_cast_exception_for_bitstring_check
+
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - checked class
     PUSH eax                              // pass arg1 - obj
@@ -1448,6 +1452,7 @@
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                              // alignment padding
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 3656f83..6bae69c 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -91,6 +91,7 @@
   qpoints->pAsin = asin;
   qpoints->pAtan = atan;
   qpoints->pAtan2 = atan2;
+  qpoints->pPow = pow;
   qpoints->pCbrt = cbrt;
   qpoints->pCosh = cosh;
   qpoints->pExp = exp;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 781ade9..81ad780 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1402,6 +1402,10 @@
 END_FUNCTION art_quick_unlock_object_no_inline
 
 DEFINE_FUNCTION art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    testl %esi, %esi
+    jz .Lthrow_class_cast_exception_for_bitstring_check
+
     // We could check the super classes here but that is usually already checked in the caller.
     PUSH rdi                          // Save args for exc
     PUSH rsi
@@ -1425,6 +1429,7 @@
     POP rsi                           // Pop arguments
     POP rdi
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 3cf2b93..2f7d6ab 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -73,7 +73,7 @@
 
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
 #define THREAD_CURRENT_IBASE_OFFSET \
-    (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 161) * __SIZEOF_POINTER__)
+    (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 162) * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
             art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index cc413c5..0fcf394 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -56,6 +56,7 @@
   "CtorFenceIns ",
   "InvokeInputs ",
   "PhiInputs    ",
+  "TypeCheckIns ",
   "LoopInfo     ",
   "LIBackEdges  ",
   "TryCatchInf  ",
@@ -95,6 +96,7 @@
   "CHA          ",
   "Scheduler    ",
   "Profile      ",
+  "SBCloner     ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 9e03658..5f3fc02a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -62,6 +62,7 @@
   kArenaAllocConstructorFenceInputs,
   kArenaAllocInvokeInputs,
   kArenaAllocPhiInputs,
+  kArenaAllocTypeCheckInputs,
   kArenaAllocLoopInfo,
   kArenaAllocLoopInfoBackEdges,
   kArenaAllocTryCatchInfo,
@@ -101,6 +102,7 @@
   kArenaAllocCHA,
   kArenaAllocScheduler,
   kArenaAllocProfile,
+  kArenaAllocSuperblockCloner,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7077298..d541b79 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -101,6 +101,7 @@
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kDexToDexCompilerLock,
   kJitCodeCacheLock,
   kCHALock,
   kSubtypeCheckLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b61fb4a..e7ee9f2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4252,17 +4252,16 @@
                                           ClassStatus& oat_file_class_status) {
   // If we're compiling, we can only verify the class using the oat file if
   // we are not compiling the image or if the class we're verifying is not part of
-  // the app.  In other words, we will only check for preverification of bootclasspath
-  // classes.
+  // the compilation unit (app - dependencies). We will let the compiler callback
+  // tell us about the latter.
   if (Runtime::Current()->IsAotCompiler()) {
+    CompilerCallbacks* callbacks = Runtime::Current()->GetCompilerCallbacks();
     // Are we compiling the bootclasspath?
-    if (Runtime::Current()->GetCompilerCallbacks()->IsBootImage()) {
+    if (callbacks->IsBootImage()) {
       return false;
     }
     // We are compiling an app (not the image).
-
-    // Is this an app class? (I.e. not a bootclasspath class)
-    if (klass->GetClassLoader() != nullptr) {
+    if (!callbacks->CanUseOatStatusForVerification(klass.Ptr())) {
       return false;
     }
   }
@@ -4483,6 +4482,14 @@
 
   Runtime::Current()->GetRuntimeCallbacks()->ClassPrepare(temp_klass, klass);
 
+  // SubtypeCheckInfo::Initialized must happen-before any new-instance for that type.
+  // See also ClassLinker::EnsureInitialized().
+  {
+    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+    SubtypeCheck<ObjPtr<mirror::Class>>::EnsureInitialized(klass.Get());
+    // TODO: Avoid taking subtype_check_lock_ if SubtypeCheck for j.l.r.Proxy is already assigned.
+  }
+
   {
     // Lock on klass is released. Lock new class object.
     ObjectLock<mirror::Class> initialization_lock(self, klass);
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 4560bca..8395966 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -25,6 +25,12 @@
 
 class CompilerDriver;
 
+namespace mirror {
+
+class Class;
+
+}  // namespace mirror
+
 namespace verifier {
 
 class MethodVerifier;
@@ -68,6 +74,11 @@
   virtual void UpdateClassState(ClassReference ref ATTRIBUTE_UNUSED,
                                 ClassStatus state ATTRIBUTE_UNUSED) {}
 
+  virtual bool CanUseOatStatusForVerification(mirror::Class* klass ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return false;
+  }
+
  protected:
   explicit CompilerCallbacks(CallbackMode mode) : mode_(mode) { }
 
diff --git a/runtime/dex/art_dex_file_loader.h b/runtime/dex/art_dex_file_loader.h
index 8c12bf3..b31d1e9 100644
--- a/runtime/dex/art_dex_file_loader.h
+++ b/runtime/dex/art_dex_file_loader.h
@@ -68,7 +68,7 @@
                                       std::unique_ptr<MemMap> mem_map,
                                       bool verify,
                                       bool verify_checksum,
-                                      std::string* error_msg) const OVERRIDE;
+                                      std::string* error_msg) const;
 
   // Opens all .dex files found in the file, guessing the container format based on file extension.
   bool Open(const char* filename,
@@ -76,7 +76,7 @@
             bool verify,
             bool verify_checksum,
             std::string* error_msg,
-            std::vector<std::unique_ptr<const DexFile>>* dex_files) const OVERRIDE;
+            std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
 
   // Open a single dex file from an fd. This function closes the fd.
   std::unique_ptr<const DexFile> OpenDex(int fd,
@@ -84,7 +84,7 @@
                                          bool verify,
                                          bool verify_checksum,
                                          bool mmap_shared,
-                                         std::string* error_msg) const OVERRIDE;
+                                         std::string* error_msg) const;
 
   // Opens dex files from within a .jar, .zip, or .apk file
   bool OpenZip(int fd,
@@ -92,7 +92,7 @@
                bool verify,
                bool verify_checksum,
                std::string* error_msg,
-               std::vector<std::unique_ptr<const DexFile>>* dex_files) const OVERRIDE;
+               std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
 
  private:
   std::unique_ptr<const DexFile> OpenFile(int fd,
@@ -100,7 +100,7 @@
                                           bool verify,
                                           bool verify_checksum,
                                           bool mmap_shared,
-                                          std::string* error_msg) const OVERRIDE;
+                                          std::string* error_msg) const;
 
   // Open all classesXXX.dex files from a zip archive.
   bool OpenAllDexFilesFromZip(const ZipArchive& zip_archive,
@@ -108,8 +108,7 @@
                               bool verify,
                               bool verify_checksum,
                               std::string* error_msg,
-                              std::vector<std::unique_ptr<const DexFile>>* dex_files)
-      const OVERRIDE;
+                              std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-null
   // return.
@@ -119,7 +118,7 @@
                                                        bool verify,
                                                        bool verify_checksum,
                                                        std::string* error_msg,
-                                                       ZipOpenErrorCode* error_code) const OVERRIDE;
+                                                       ZipOpenErrorCode* error_code) const;
 };
 
 }  // namespace art
diff --git a/runtime/dex/dex_file_loader.cc b/runtime/dex/dex_file_loader.cc
index c80ea19..ccad19f 100644
--- a/runtime/dex/dex_file_loader.cc
+++ b/runtime/dex/dex_file_loader.cc
@@ -16,25 +16,143 @@
 
 #include "dex_file_loader.h"
 
-// #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
-// #include <sys/stat.h>
-
 #include "android-base/stringprintf.h"
 
-#include "base/file_magic.h"
 #include "base/stl_util.h"
-// #include "base/systrace.h"
-// #include "base/unix_file/fd_file.h"
 #include "compact_dex_file.h"
 #include "dex_file.h"
 #include "dex_file_verifier.h"
 #include "standard_dex_file.h"
-// #include "zip_archive.h"
+#include "ziparchive/zip_archive.h"
+
+// system/core/zip_archive definitions.
+struct ZipEntry;
+typedef void* ZipArchiveHandle;
 
 namespace art {
 
+namespace {
+
+class VectorContainer : public DexFileContainer {
+ public:
+  explicit VectorContainer(std::vector<uint8_t>&& vector) : vector_(std::move(vector)) { }
+  virtual ~VectorContainer() OVERRIDE { }
+
+  int GetPermissions() OVERRIDE {
+    return 0;
+  }
+
+  bool IsReadOnly() OVERRIDE {
+    return true;
+  }
+
+  bool EnableWrite() OVERRIDE {
+    return false;
+  }
+
+  bool DisableWrite() OVERRIDE {
+    return false;
+  }
+
+ private:
+  std::vector<uint8_t> vector_;
+  DISALLOW_COPY_AND_ASSIGN(VectorContainer);
+};
+
+}  // namespace
+
 using android::base::StringPrintf;
 
+class DexZipArchive;
+
+class DexZipEntry {
+ public:
+  // Extract this entry to memory.
+  // Returns null on failure and sets error_msg.
+  const std::vector<uint8_t> Extract(std::string* error_msg) {
+    std::vector<uint8_t> map(GetUncompressedLength());
+    if (map.size() == 0) {
+      DCHECK(!error_msg->empty());
+      return map;
+    }
+    const int32_t error = ExtractToMemory(handle_, zip_entry_, map.data(), map.size());
+    if (error) {
+      *error_msg = std::string(ErrorCodeString(error));
+    }
+    return map;
+  }
+
+  virtual ~DexZipEntry() {
+    delete zip_entry_;
+  }
+
+  uint32_t GetUncompressedLength() {
+    return zip_entry_->uncompressed_length;
+  }
+
+  uint32_t GetCrc32() {
+    return zip_entry_->crc32;
+  }
+
+ private:
+  DexZipEntry(ZipArchiveHandle handle,
+              ::ZipEntry* zip_entry,
+           const std::string& entry_name)
+    : handle_(handle), zip_entry_(zip_entry), entry_name_(entry_name) {}
+
+  ZipArchiveHandle handle_;
+  ::ZipEntry* const zip_entry_;
+  std::string const entry_name_;
+
+  friend class DexZipArchive;
+  DISALLOW_COPY_AND_ASSIGN(DexZipEntry);
+};
+
+class DexZipArchive {
+ public:
+  // return new DexZipArchive instance on success, null on error.
+  static DexZipArchive* Open(const uint8_t* base, size_t size, std::string* error_msg) {
+    ZipArchiveHandle handle;
+    uint8_t* nonconst_base = const_cast<uint8_t*>(base);
+    const int32_t error = OpenArchiveFromMemory(nonconst_base, size, "ZipArchiveMemory", &handle);
+    if (error) {
+      *error_msg = std::string(ErrorCodeString(error));
+      CloseArchive(handle);
+      return nullptr;
+    }
+    return new DexZipArchive(handle);
+  }
+
+  DexZipEntry* Find(const char* name, std::string* error_msg) const {
+    DCHECK(name != nullptr);
+    // Resist the urge to delete the space. <: is a bigraph sequence.
+    std::unique_ptr< ::ZipEntry> zip_entry(new ::ZipEntry);
+    const int32_t error = FindEntry(handle_, ZipString(name), zip_entry.get());
+    if (error) {
+      *error_msg = std::string(ErrorCodeString(error));
+      return nullptr;
+    }
+    return new DexZipEntry(handle_, zip_entry.release(), name);
+  }
+
+  ~DexZipArchive() {
+    CloseArchive(handle_);
+  }
+
+
+ private:
+  explicit DexZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
+  ZipArchiveHandle handle_;
+
+  friend class DexZipEntry;
+  DISALLOW_COPY_AND_ASSIGN(DexZipArchive);
+};
+
+static bool IsZipMagic(uint32_t magic) {
+  return (('P' == ((magic >> 0) & 0xff)) &&
+          ('K' == ((magic >> 8) & 0xff)));
+}
+
 bool DexFileLoader::IsMagicValid(uint32_t magic) {
   return IsMagicValid(reinterpret_cast<uint8_t*>(&magic));
 }
@@ -114,80 +232,47 @@
                     /*verify_result*/ nullptr);
 }
 
-std::unique_ptr<const DexFile> DexFileLoader::Open(const std::string& location ATTRIBUTE_UNUSED,
-                                                   uint32_t location_checksum ATTRIBUTE_UNUSED,
-                                                   std::unique_ptr<MemMap> map ATTRIBUTE_UNUSED,
-                                                   bool verify ATTRIBUTE_UNUSED,
-                                                   bool verify_checksum ATTRIBUTE_UNUSED,
-                                                   std::string* error_msg) const {
-  *error_msg = "UNIMPLEMENTED";
-  return nullptr;
-}
-
-bool DexFileLoader::Open(
-    const char* filename ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
+bool DexFileLoader::OpenAll(
+    const uint8_t* base,
+    size_t size,
+    const std::string& location,
+    bool verify,
+    bool verify_checksum,
     std::string* error_msg,
-    std::vector<std::unique_ptr<const DexFile>>* dex_files ATTRIBUTE_UNUSED) const {
-  *error_msg = "UNIMPLEMENTED";
-  return false;
-}
-
-std::unique_ptr<const DexFile> DexFileLoader::OpenDex(
-    int fd ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
-    bool mmap_shared ATTRIBUTE_UNUSED,
-    std::string* error_msg) const {
-  *error_msg = "UNIMPLEMENTED";
-  return nullptr;
-}
-
-bool DexFileLoader::OpenZip(
-    int fd ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
-    std::string* error_msg,
-    std::vector<std::unique_ptr<const DexFile>>* dex_files ATTRIBUTE_UNUSED) const {
-  *error_msg = "UNIMPLEMENTED";
-  return false;
-}
-
-std::unique_ptr<const DexFile> DexFileLoader::OpenFile(
-    int fd ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
-    bool mmap_shared ATTRIBUTE_UNUSED,
-    std::string* error_msg) const {
-  *error_msg = "UNIMPLEMENTED";
-  return nullptr;
-}
-
-std::unique_ptr<const DexFile> DexFileLoader::OpenOneDexFileFromZip(
-    const ZipArchive& zip_archive ATTRIBUTE_UNUSED,
-    const char* entry_name ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
-    std::string* error_msg,
-    ZipOpenErrorCode* error_code ATTRIBUTE_UNUSED) const {
-  *error_msg = "UNIMPLEMENTED";
-  return nullptr;
-}
-
-bool DexFileLoader::OpenAllDexFilesFromZip(
-    const ZipArchive& zip_archive ATTRIBUTE_UNUSED,
-    const std::string& location ATTRIBUTE_UNUSED,
-    bool verify ATTRIBUTE_UNUSED,
-    bool verify_checksum ATTRIBUTE_UNUSED,
-    std::string* error_msg,
-    std::vector<std::unique_ptr<const DexFile>>* dex_files ATTRIBUTE_UNUSED) const {
-  *error_msg = "UNIMPLEMENTED";
+    std::vector<std::unique_ptr<const DexFile>>* dex_files) const {
+  DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is nullptr";
+  uint32_t magic = *reinterpret_cast<const uint32_t*>(base);
+  if (IsZipMagic(magic)) {
+    std::unique_ptr<DexZipArchive> zip_archive(DexZipArchive::Open(base, size, error_msg));
+    if (zip_archive.get() == nullptr) {
+      DCHECK(!error_msg->empty());
+      return false;
+    }
+    return OpenAllDexFilesFromZip(*zip_archive.get(),
+                                  location,
+                                  verify,
+                                  verify_checksum,
+                                  error_msg,
+                                  dex_files);
+  }
+  if (IsMagicValid(magic)) {
+    const DexFile::Header* dex_header = reinterpret_cast<const DexFile::Header*>(base);
+    std::unique_ptr<const DexFile> dex_file(Open(base,
+                                                 size,
+                                                 location,
+                                                 dex_header->checksum_,
+                                                 /*oat_dex_file*/ nullptr,
+                                                 verify,
+                                                 verify_checksum,
+                                                 error_msg));
+    if (dex_file.get() != nullptr) {
+      dex_files->push_back(std::move(dex_file));
+      return true;
+    } else {
+      return false;
+    }
+  }
+  *error_msg = StringPrintf("Expected valid zip or dex file");
   return false;
 }
 
@@ -238,4 +323,125 @@
   return dex_file;
 }
 
+std::unique_ptr<const DexFile> DexFileLoader::OpenOneDexFileFromZip(
+    const DexZipArchive& zip_archive,
+    const char* entry_name,
+    const std::string& location,
+    bool verify,
+    bool verify_checksum,
+    std::string* error_msg,
+    ZipOpenErrorCode* error_code) const {
+  CHECK(!location.empty());
+  std::unique_ptr<DexZipEntry> zip_entry(zip_archive.Find(entry_name, error_msg));
+  if (zip_entry == nullptr) {
+    *error_code = ZipOpenErrorCode::kEntryNotFound;
+    return nullptr;
+  }
+  if (zip_entry->GetUncompressedLength() == 0) {
+    *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str());
+    *error_code = ZipOpenErrorCode::kDexFileError;
+    return nullptr;
+  }
+
+  std::vector<uint8_t> map(zip_entry->Extract(error_msg));
+  if (map.size() == 0) {
+    *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
+                              error_msg->c_str());
+    *error_code = ZipOpenErrorCode::kExtractToMemoryError;
+    return nullptr;
+  }
+  VerifyResult verify_result;
+  std::unique_ptr<const DexFile> dex_file = OpenCommon(map.data(),
+                                                       map.size(),
+                                                       location,
+                                                       zip_entry->GetCrc32(),
+                                                       /*oat_dex_file*/ nullptr,
+                                                       verify,
+                                                       verify_checksum,
+                                                       error_msg,
+                                                       new VectorContainer(std::move(map)),
+                                                       &verify_result);
+  if (dex_file == nullptr) {
+    if (verify_result == VerifyResult::kVerifyNotAttempted) {
+      *error_code = ZipOpenErrorCode::kDexFileError;
+    } else {
+      *error_code = ZipOpenErrorCode::kVerifyError;
+    }
+    return nullptr;
+  }
+  if (verify_result != VerifyResult::kVerifySucceeded) {
+    *error_code = ZipOpenErrorCode::kVerifyError;
+    return nullptr;
+  }
+  *error_code = ZipOpenErrorCode::kNoError;
+  return dex_file;
+}
+
+// Technically we do not have a limitation with respect to the number of dex files that can be in a
+// multidex APK. However, it's bad practice, as each dex file requires its own tables for symbols
+// (types, classes, methods, ...) and dex caches. So warn the user that we open a zip with what
+// seems an excessive number.
+static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
+
+bool DexFileLoader::OpenAllDexFilesFromZip(
+    const DexZipArchive& zip_archive,
+    const std::string& location,
+    bool verify,
+    bool verify_checksum,
+    std::string* error_msg,
+    std::vector<std::unique_ptr<const DexFile>>* dex_files) const {
+  DCHECK(dex_files != nullptr) << "DexFile::OpenFromZip: out-param is nullptr";
+  ZipOpenErrorCode error_code;
+  std::unique_ptr<const DexFile> dex_file(OpenOneDexFileFromZip(zip_archive,
+                                                                kClassesDex,
+                                                                location,
+                                                                verify,
+                                                                verify_checksum,
+                                                                error_msg,
+                                                                &error_code));
+  if (dex_file.get() == nullptr) {
+    return false;
+  } else {
+    // Had at least classes.dex.
+    dex_files->push_back(std::move(dex_file));
+
+    // Now try some more.
+
+    // We could try to avoid std::string allocations by working on a char array directly. As we
+    // do not expect a lot of iterations, this seems too involved and brittle.
+
+    for (size_t i = 1; ; ++i) {
+      std::string name = GetMultiDexClassesDexName(i);
+      std::string fake_location = GetMultiDexLocation(i, location.c_str());
+      std::unique_ptr<const DexFile> next_dex_file(OpenOneDexFileFromZip(zip_archive,
+                                                                         name.c_str(),
+                                                                         fake_location,
+                                                                         verify,
+                                                                         verify_checksum,
+                                                                         error_msg,
+                                                                         &error_code));
+      if (next_dex_file.get() == nullptr) {
+        if (error_code != ZipOpenErrorCode::kEntryNotFound) {
+          LOG(WARNING) << "Zip open failed: " << *error_msg;
+        }
+        break;
+      } else {
+        dex_files->push_back(std::move(next_dex_file));
+      }
+
+      if (i == kWarnOnManyDexFilesThreshold) {
+        LOG(WARNING) << location << " has in excess of " << kWarnOnManyDexFilesThreshold
+                     << " dex files. Please consider coalescing and shrinking the number to "
+                        " avoid runtime overhead.";
+      }
+
+      if (i == std::numeric_limits<size_t>::max()) {
+        LOG(ERROR) << "Overflow in number of dex files!";
+        break;
+      }
+    }
+
+    return true;
+  }
+}
 }  // namespace art
diff --git a/runtime/dex/dex_file_loader.h b/runtime/dex/dex_file_loader.h
index 4e45fb0..05a51d0 100644
--- a/runtime/dex/dex_file_loader.h
+++ b/runtime/dex/dex_file_loader.h
@@ -28,7 +28,8 @@
 class DexFileContainer;
 class MemMap;
 class OatDexFile;
-class ZipArchive;
+
+class DexZipArchive;
 
 // Class that is used to open dex files and deal with corresponding multidex and location logic.
 class DexFileLoader {
@@ -46,68 +47,10 @@
   // Return true if the corresponding version and magic is valid.
   static bool IsVersionAndMagicValid(const uint8_t* magic);
 
-  virtual ~DexFileLoader() { }
-
-  // Returns the checksums of a file for comparison with GetLocationChecksum().
-  // For .dex files, this is the single header checksum.
-  // For zip files, this is the zip entry CRC32 checksum for classes.dex and
-  // each additional multidex entry classes2.dex, classes3.dex, etc.
-  // If a valid zip_fd is provided the file content will be read directly from
-  // the descriptor and `filename` will be used as alias for error logging. If
-  // zip_fd is -1, the method will try to open the `filename` and read the
-  // content from it.
-  // Return true if the checksums could be found, false otherwise.
-  virtual bool GetMultiDexChecksums(const char* filename,
-                                    std::vector<uint32_t>* checksums,
-                                    std::string* error_msg,
-                                    int zip_fd = -1) const;
-
   // Check whether a location denotes a multidex dex file. This is a very simple check: returns
   // whether the string contains the separator character.
   static bool IsMultiDexLocation(const char* location);
 
-  // Opens .dex file, backed by existing memory
-  virtual std::unique_ptr<const DexFile> Open(const uint8_t* base,
-                                              size_t size,
-                                              const std::string& location,
-                                              uint32_t location_checksum,
-                                              const OatDexFile* oat_dex_file,
-                                              bool verify,
-                                              bool verify_checksum,
-                                              std::string* error_msg) const;
-
-  // Opens .dex file that has been memory-mapped by the caller.
-  virtual std::unique_ptr<const DexFile> Open(const std::string& location,
-                                              uint32_t location_checkum,
-                                              std::unique_ptr<MemMap> mem_map,
-                                              bool verify,
-                                              bool verify_checksum,
-                                              std::string* error_msg) const;
-
-  // Opens all .dex files found in the file, guessing the container format based on file extension.
-  virtual bool Open(const char* filename,
-                    const std::string& location,
-                    bool verify,
-                    bool verify_checksum,
-                    std::string* error_msg,
-                    std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
-
-  // Open a single dex file from an fd. This function closes the fd.
-  virtual std::unique_ptr<const DexFile> OpenDex(int fd,
-                                                 const std::string& location,
-                                                 bool verify,
-                                                 bool verify_checksum,
-                                                 bool mmap_shared,
-                                                 std::string* error_msg) const;
-
-  // Opens dex files from within a .jar, .zip, or .apk file
-  virtual bool OpenZip(int fd,
-                       const std::string& location,
-                       bool verify,
-                       bool verify_checksum,
-                       std::string* error_msg,
-                       std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
-
   // Return the name of the index-th classes.dex in a multidex zip file. This is classes.dex for
   // index == 0, and classes{index + 1}.dex else.
   static std::string GetMultiDexClassesDexName(size_t index);
@@ -151,6 +94,42 @@
     return (pos == std::string::npos) ? std::string() : location.substr(pos);
   }
 
+  virtual ~DexFileLoader() { }
+
+  // Returns the checksums of a file for comparison with GetLocationChecksum().
+  // For .dex files, this is the single header checksum.
+  // For zip files, this is the zip entry CRC32 checksum for classes.dex and
+  // each additional multidex entry classes2.dex, classes3.dex, etc.
+  // If a valid zip_fd is provided the file content will be read directly from
+  // the descriptor and `filename` will be used as alias for error logging. If
+  // zip_fd is -1, the method will try to open the `filename` and read the
+  // content from it.
+  // Return true if the checksums could be found, false otherwise.
+  virtual bool GetMultiDexChecksums(const char* filename,
+                                    std::vector<uint32_t>* checksums,
+                                    std::string* error_msg,
+                                    int zip_fd = -1) const;
+
+  // Opens .dex file, backed by existing memory
+  virtual std::unique_ptr<const DexFile> Open(const uint8_t* base,
+                                              size_t size,
+                                              const std::string& location,
+                                              uint32_t location_checksum,
+                                              const OatDexFile* oat_dex_file,
+                                              bool verify,
+                                              bool verify_checksum,
+                                              std::string* error_msg) const;
+
+  // Opens all .dex files found in the memory map, guessing the container format based on file
+  // extension.
+  virtual bool OpenAll(const uint8_t* base,
+                       size_t size,
+                       const std::string& location,
+                       bool verify,
+                       bool verify_checksum,
+                       std::string* error_msg,
+                       std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
+
  protected:
   enum class ZipOpenErrorCode {
     kNoError,
@@ -179,30 +158,23 @@
                                              VerifyResult* verify_result);
 
  private:
-  virtual std::unique_ptr<const DexFile> OpenFile(int fd,
-                                                  const std::string& location,
-                                                  bool verify,
-                                                  bool verify_checksum,
-                                                  bool mmap_shared,
-                                                  std::string* error_msg) const;
-
   // Open all classesXXX.dex files from a zip archive.
-  virtual bool OpenAllDexFilesFromZip(const ZipArchive& zip_archive,
-                                      const std::string& location,
-                                      bool verify,
-                                      bool verify_checksum,
-                                      std::string* error_msg,
-                                      std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
+  bool OpenAllDexFilesFromZip(const DexZipArchive& zip_archive,
+                              const std::string& location,
+                              bool verify,
+                              bool verify_checksum,
+                              std::string* error_msg,
+                              std::vector<std::unique_ptr<const DexFile>>* dex_files) const;
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-null
   // return.
-  virtual std::unique_ptr<const DexFile> OpenOneDexFileFromZip(const ZipArchive& zip_archive,
-                                                               const char* entry_name,
-                                                               const std::string& location,
-                                                               bool verify,
-                                                               bool verify_checksum,
-                                                               std::string* error_msg,
-                                                               ZipOpenErrorCode* error_code) const;
+  std::unique_ptr<const DexFile> OpenOneDexFileFromZip(const DexZipArchive& zip_archive,
+                                                       const char* entry_name,
+                                                       const std::string& location,
+                                                       bool verify,
+                                                       bool verify_checksum,
+                                                       std::string* error_msg,
+                                                       ZipOpenErrorCode* error_code) const;
 };
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 74e7c18..48a56f2 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -91,6 +91,7 @@
   V(Asin, double, double) \
   V(Atan, double, double) \
   V(Atan2, double, double, double) \
+  V(Pow, double, double, double) \
   V(Cbrt, double, double) \
   V(Cosh, double, double) \
   V(Exp, double, double) \
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 565b4ed..4b26bee 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -15,8 +15,11 @@
  */
 
 #include "callee_save_frame.h"
+#include "dex/code_item_accessors-inl.h"
+#include "dex/dex_instruction-inl.h"
 #include "common_throws.h"
 #include "mirror/object-inl.h"
+#include "nth_caller_visitor.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -111,6 +114,21 @@
                                                      Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  if (dest_type == nullptr) {
+    // Find the target class for check cast using the bitstring check (dest_type == null).
+    NthCallerVisitor visitor(self, 0u);
+    visitor.WalkStack();
+    DCHECK(visitor.caller != nullptr);
+    uint32_t dex_pc = visitor.GetDexPc();
+    CodeItemDataAccessor accessor(visitor.caller);
+    const Instruction& check_cast = accessor.InstructionAt(dex_pc);
+    DCHECK_EQ(check_cast.Opcode(), Instruction::CHECK_CAST);
+    dex::TypeIndex type_index(check_cast.VRegB_21c());
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    dest_type = linker->LookupResolvedType(type_index, visitor.caller).Ptr();
+    CHECK(dest_type != nullptr) << "Target class should have been previously resolved: "
+        << visitor.caller->GetDexFile()->PrettyType(type_index);
+  }
   DCHECK(!dest_type->IsAssignableFrom(src_type));
   ThrowClassCastException(dest_type, src_type);
   self->QuickDeliverException();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 7c912d0..1fdf439 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -238,7 +238,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pPow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pPow, pCbrt, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*));
diff --git a/runtime/image.cc b/runtime/image.cc
index dd0c148..8e3615f 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '3', '\0' };  // ClassStatus in high bits.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '4', '\0' };  // Math.pow() intrinsic.
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 99a4f76..681a582 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -478,6 +478,7 @@
     UNIMPLEMENTED_CASE(MathLog /* (D)D */)
     UNIMPLEMENTED_CASE(MathLog10 /* (D)D */)
     UNIMPLEMENTED_CASE(MathNextAfter /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathPow /* (DD)D */)
     UNIMPLEMENTED_CASE(MathSinh /* (D)D */)
     INTRINSIC_CASE(MathTan)
     UNIMPLEMENTED_CASE(MathTanh /* (D)D */)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index d007728..da08793 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -136,6 +136,7 @@
   V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "asin", "(D)D") \
   V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan", "(D)D") \
   V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan2", "(DD)D") \
+  V(MathPow, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "pow", "(DD)D") \
   V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cbrt", "(D)D") \
   V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cosh", "(D)D") \
   V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "exp", "(D)D") \
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 4d1c85a..0e295e2 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -42,6 +42,7 @@
     JITCodeEntry* prev_;
     const uint8_t *symfile_addr_;
     uint64_t symfile_size_;
+    uint32_t ref_count;  // ART internal field.
   };
 
   struct JITDescriptor {
@@ -69,10 +70,11 @@
   JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
 }
 
-static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
+Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
 
-static JITCodeEntry* CreateJITCodeEntryInternal(std::vector<uint8_t> symfile)
-    REQUIRES(g_jit_debug_mutex) {
+static size_t g_jit_debug_mem_usage = 0;
+
+JITCodeEntry* CreateJITCodeEntry(const std::vector<uint8_t>& symfile) {
   DCHECK_NE(symfile.size(), 0u);
 
   // Make a copy of the buffer. We want to shrink it anyway.
@@ -85,20 +87,20 @@
   entry->symfile_addr_ = symfile_copy;
   entry->symfile_size_ = symfile.size();
   entry->prev_ = nullptr;
-
+  entry->ref_count = 0;
   entry->next_ = __jit_debug_descriptor.first_entry_;
   if (entry->next_ != nullptr) {
     entry->next_->prev_ = entry;
   }
+  g_jit_debug_mem_usage += sizeof(JITCodeEntry) + entry->symfile_size_;
   __jit_debug_descriptor.first_entry_ = entry;
   __jit_debug_descriptor.relevant_entry_ = entry;
-
   __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
   (*__jit_debug_register_code_ptr)();
   return entry;
 }
 
-static void DeleteJITCodeEntryInternal(JITCodeEntry* entry) REQUIRES(g_jit_debug_mutex) {
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
   if (entry->prev_ != nullptr) {
     entry->prev_->next_ = entry->next_;
   } else {
@@ -109,6 +111,7 @@
     entry->next_->prev_ = entry->prev_;
   }
 
+  g_jit_debug_mem_usage -= sizeof(JITCodeEntry) + entry->symfile_size_;
   __jit_debug_descriptor.relevant_entry_ = entry;
   __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
   (*__jit_debug_register_code_ptr)();
@@ -116,41 +119,33 @@
   delete entry;
 }
 
-JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, g_jit_debug_mutex);
-  return CreateJITCodeEntryInternal(std::move(symfile));
+// Mapping from code address to entry. Used to manage life-time of the entries.
+static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries
+    GUARDED_BY(g_jit_debug_mutex);
+
+void IncrementJITCodeEntryRefcount(JITCodeEntry* entry, uintptr_t code_address) {
+  DCHECK(entry != nullptr);
+  DCHECK_EQ(g_jit_code_entries.count(code_address), 0u);
+  entry->ref_count++;
+  g_jit_code_entries.emplace(code_address, entry);
 }
 
-void DeleteJITCodeEntry(JITCodeEntry* entry) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, g_jit_debug_mutex);
-  DeleteJITCodeEntryInternal(entry);
-}
-
-// Mapping from address to entry.  It takes ownership of the entries
-// so that the user of the JIT interface does not have to store them.
-static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
-
-void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, g_jit_debug_mutex);
-  DCHECK_NE(address, 0u);
-  DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
-  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile));
-  g_jit_code_entries.emplace(address, entry);
-}
-
-bool DeleteJITCodeEntryForAddress(uintptr_t address) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, g_jit_debug_mutex);
-  const auto it = g_jit_code_entries.find(address);
-  if (it == g_jit_code_entries.end()) {
-    return false;
+void DecrementJITCodeEntryRefcount(JITCodeEntry* entry, uintptr_t code_address) {
+  DCHECK(entry != nullptr);
+  DCHECK(g_jit_code_entries[code_address] == entry);
+  if (--entry->ref_count == 0) {
+    DeleteJITCodeEntry(entry);
   }
-  DeleteJITCodeEntryInternal(it->second);
-  g_jit_code_entries.erase(it);
-  return true;
+  g_jit_code_entries.erase(code_address);
+}
+
+JITCodeEntry* GetJITCodeEntry(uintptr_t code_address) {
+  auto it = g_jit_code_entries.find(code_address);
+  return it == g_jit_code_entries.end() ? nullptr : it->second;
+}
+
+size_t GetJITCodeEntryMemUsage() {
+  return g_jit_debug_mem_usage + g_jit_code_entries.size() * 2 * sizeof(void*);
 }
 
 }  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index d9bf331..9aec988 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -21,28 +21,45 @@
 #include <memory>
 #include <vector>
 
+#include "base/array_ref.h"
+#include "base/mutex.h"
+
 namespace art {
 
 extern "C" {
   struct JITCodeEntry;
 }
 
+extern Mutex g_jit_debug_mutex;
+
 // Notify native debugger about new JITed code by passing in-memory ELF.
 // It takes ownership of the in-memory ELF file.
-JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile);
+JITCodeEntry* CreateJITCodeEntry(const std::vector<uint8_t>& symfile)
+    REQUIRES(g_jit_debug_mutex);
 
 // Notify native debugger that JITed code has been removed.
 // It also releases the associated in-memory ELF file.
-void DeleteJITCodeEntry(JITCodeEntry* entry);
+void DeleteJITCodeEntry(JITCodeEntry* entry)
+    REQUIRES(g_jit_debug_mutex);
 
-// Notify native debugger about new JITed code by passing in-memory ELF.
-// The address is used only to uniquely identify the entry.
-// It takes ownership of the in-memory ELF file.
-void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile);
+// Helper method to track life-time of JITCodeEntry.
+// It registers given code address as being described by the given entry.
+void IncrementJITCodeEntryRefcount(JITCodeEntry* entry, uintptr_t code_address)
+    REQUIRES(g_jit_debug_mutex);
 
-// Notify native debugger that JITed code has been removed.
-// Returns false if entry for the given address was not found.
-bool DeleteJITCodeEntryForAddress(uintptr_t address);
+// Helper method to track life-time of JITCodeEntry.
+// It de-registers given code address as being described by the given entry.
+void DecrementJITCodeEntryRefcount(JITCodeEntry* entry, uintptr_t code_address)
+    REQUIRES(g_jit_debug_mutex);
+
+// Find the registered JITCodeEntry for given code address.
+// There can be only one entry per address at any given time.
+JITCodeEntry* GetJITCodeEntry(uintptr_t code_address)
+    REQUIRES(g_jit_debug_mutex);
+
+// Returns approximate memory used by all JITCodeEntries.
+size_t GetJITCodeEntryMemUsage()
+    REQUIRES(g_jit_debug_mutex);
 
 }  // namespace art
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 791c338..6d27cfe 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -23,7 +23,6 @@
 #include "base/timing_logger.h"
 #include "jit/profile_saver_options.h"
 #include "obj_ptr.h"
-#include "profile_compilation_info.h"
 #include "thread_pool.h"
 
 namespace art {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 659c55a..e41667a 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -549,7 +549,11 @@
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
-  DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
+  MutexLock mu(Thread::Current(), g_jit_debug_mutex);
+  JITCodeEntry* entry = GetJITCodeEntry(reinterpret_cast<uintptr_t>(code_ptr));
+  if (entry != nullptr) {
+    DecrementJITCodeEntryRefcount(entry, reinterpret_cast<uintptr_t>(code_ptr));
+  }
   if (OatQuickMethodHeader::FromCodePointer(code_ptr)->IsOptimized()) {
     FreeData(GetRootTable(code_ptr));
   }  // else this is a JNI stub without any data.
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 74bf237..33fa0d6 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -47,6 +47,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "utils.h"
+#include "zip_archive.h"
 
 namespace art {
 
@@ -56,6 +57,10 @@
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
 
+// The name of the profile entry in the dex metadata file.
+// DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
+const char* ProfileCompilationInfo::kDexMetadataProfileEntry = "primary.prof";
+
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
 // Debug flag to ignore checksums when testing if a method or a class is present in the profile.
@@ -194,7 +199,7 @@
 
   int fd = profile_file->Fd();
 
-  ProfileLoadSatus status = LoadInternal(fd, &error);
+  ProfileLoadStatus status = LoadInternal(fd, &error);
   if (status == kProfileLoadSuccess) {
     return true;
   }
@@ -225,7 +230,7 @@
 
   int fd = profile_file->Fd();
 
-  ProfileLoadSatus status = LoadInternal(fd, &error);
+  ProfileLoadStatus status = LoadInternal(fd, &error);
   if (status == kProfileLoadSuccess) {
     return true;
   }
@@ -883,25 +888,13 @@
   return false;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
-      int fd,
-      const std::string& source,
-      /*out*/std::string* error) {
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::SafeBuffer::Fill(
+      ProfileSource& source,
+      const std::string& debug_stage,
+      /*out*/ std::string* error) {
   size_t byte_count = (ptr_end_ - ptr_current_) * sizeof(*ptr_current_);
   uint8_t* buffer = ptr_current_;
-  while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
-    if (bytes_read == 0) {
-      *error += "Profile EOF reached prematurely for " + source;
-      return kProfileLoadBadData;
-    } else if (bytes_read < 0) {
-      *error += "Profile IO error for " + source + strerror(errno);
-      return kProfileLoadIOError;
-    }
-    byte_count -= bytes_read;
-    buffer += bytes_read;
-  }
-  return kProfileLoadSuccess;
+  return source.Read(buffer, byte_count, debug_stage, error);
 }
 
 size_t ProfileCompilationInfo::SafeBuffer::CountUnreadBytes() {
@@ -916,8 +909,8 @@
   ptr_current_ += data_size;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
-      int fd,
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileHeader(
+      ProfileSource& source,
       /*out*/uint8_t* number_of_dex_files,
       /*out*/uint32_t* uncompressed_data_size,
       /*out*/uint32_t* compressed_data_size,
@@ -932,7 +925,7 @@
 
   SafeBuffer safe_buffer(kMagicVersionSize);
 
-  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  ProfileLoadStatus status = safe_buffer.Fill(source, "ReadProfileHeader", error);
   if (status != kProfileLoadSuccess) {
     return status;
   }
@@ -972,7 +965,7 @@
   return true;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileLineHeader(
     SafeBuffer& buffer,
     /*out*/ProfileLineHeader* line_header,
     /*out*/std::string* error) {
@@ -1003,7 +996,7 @@
   return kProfileLoadSuccess;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileLine(
       SafeBuffer& buffer,
       uint8_t number_of_dex_files,
       const ProfileLineHeader& line_header,
@@ -1046,7 +1039,7 @@
 bool ProfileCompilationInfo::Load(int fd, bool merge_classes) {
   std::string error;
 
-  ProfileLoadSatus status = LoadInternal(fd, &error, merge_classes);
+  ProfileLoadStatus status = LoadInternal(fd, &error, merge_classes);
 
   if (status == kProfileLoadSuccess) {
     return true;
@@ -1148,31 +1141,136 @@
   return true;
 }
 
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::OpenSource(
+    int32_t fd,
+    /*out*/ std::unique_ptr<ProfileSource>* source,
+    /*out*/ std::string* error) {
+  if (IsProfileFile(fd)) {
+    source->reset(ProfileSource::Create(fd));
+    return kProfileLoadSuccess;
+  } else {
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, "profile", error));
+    if (zip_archive.get() == nullptr) {
+      *error = "Could not open the profile zip archive";
+      return kProfileLoadBadData;
+    }
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kDexMetadataProfileEntry, error));
+    if (zip_entry == nullptr) {
+      // Allow archives without the profile entry. In this case, create an empty profile.
+      // This gives more flexible when ure-using archives that may miss the entry.
+      // (e.g. dex metadata files)
+      LOG(WARNING) << std::string("Could not find entry ") + kDexMetadataProfileEntry +
+            " in the zip archive. Creating an empty profile.";
+      source->reset(ProfileSource::Create(nullptr));
+      return kProfileLoadSuccess;
+    }
+    if (zip_entry->GetUncompressedLength() == 0) {
+      *error = "Empty profile entry in the zip archive.";
+      return kProfileLoadBadData;
+    }
+
+    std::unique_ptr<MemMap> map;
+    if (zip_entry->IsUncompressed()) {
+      // Map uncompressed files within zip as file-backed to avoid a dirty copy.
+      map.reset(zip_entry->MapDirectlyFromFile(kDexMetadataProfileEntry, error));
+      if (map == nullptr) {
+        LOG(WARNING) << "Can't mmap profile directly; "
+                     << "is your ZIP file corrupted? Falling back to extraction.";
+        // Try again with Extraction which still has a chance of recovery.
+      }
+    }
+
+    if (map == nullptr) {
+      // Default path for compressed ZIP entries, and fallback for stored ZIP entries.
+      // TODO(calin) pass along file names to assist with debugging.
+      map.reset(zip_entry->ExtractToMemMap("profile file", kDexMetadataProfileEntry, error));
+    }
+
+    if (map != nullptr) {
+      source->reset(ProfileSource::Create(std::move(map)));
+      return kProfileLoadSuccess;
+    } else {
+      return kProfileLoadBadData;
+    }
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ProfileSource::Read(
+    uint8_t* buffer,
+    size_t byte_count,
+    const std::string& debug_stage,
+    std::string* error) {
+  if (IsMemMap()) {
+    if (mem_map_cur_ + byte_count > mem_map_->Size()) {
+      return kProfileLoadBadData;
+    }
+    for (size_t i = 0; i < byte_count; i++) {
+      buffer[i] = *(mem_map_->Begin() + mem_map_cur_);
+      mem_map_cur_++;
+    }
+  } else {
+    while (byte_count > 0) {
+      int bytes_read = TEMP_FAILURE_RETRY(read(fd_, buffer, byte_count));;
+      if (bytes_read == 0) {
+        *error += "Profile EOF reached prematurely for " + debug_stage;
+        return kProfileLoadBadData;
+      } else if (bytes_read < 0) {
+        *error += "Profile IO error for " + debug_stage + strerror(errno);
+        return kProfileLoadIOError;
+      }
+      byte_count -= bytes_read;
+      buffer += bytes_read;
+    }
+  }
+  return kProfileLoadSuccess;
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasConsumedAllData() const {
+  return IsMemMap()
+      ? (mem_map_ == nullptr || mem_map_cur_ == mem_map_->Size())
+      : (testEOF(fd_) == 0);
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasEmptyContent() const {
+  if (IsMemMap()) {
+    return mem_map_ == nullptr || mem_map_->Size() == 0;
+  } else {
+    struct stat stat_buffer;
+    if (fstat(fd_, &stat_buffer) != 0) {
+      return false;
+    }
+    return stat_buffer.st_size == 0;
+  }
+}
+
 // TODO(calin): fail fast if the dex checksums don't match.
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
-      int fd, std::string* error, bool merge_classes) {
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::LoadInternal(
+      int32_t fd, std::string* error, bool merge_classes) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  struct stat stat_buffer;
-  if (fstat(fd, &stat_buffer) != 0) {
-    return kProfileLoadIOError;
+  std::unique_ptr<ProfileSource> source;
+  ProfileLoadStatus status = OpenSource(fd, &source, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
   }
+
   // We allow empty profile files.
   // Profiles may be created by ActivityManager or installd before we manage to
   // process them in the runtime or profman.
-  if (stat_buffer.st_size == 0) {
+  if (source->HasEmptyContent()) {
     return kProfileLoadSuccess;
   }
+
   // Read profile header: magic + version + number_of_dex_files.
   uint8_t number_of_dex_files;
   uint32_t uncompressed_data_size;
   uint32_t compressed_data_size;
-  ProfileLoadSatus status = ReadProfileHeader(fd,
-                                              &number_of_dex_files,
-                                              &uncompressed_data_size,
-                                              &compressed_data_size,
-                                              error);
+  status = ReadProfileHeader(*source,
+                             &number_of_dex_files,
+                             &uncompressed_data_size,
+                             &compressed_data_size,
+                             error);
 
   if (status != kProfileLoadSuccess) {
     return status;
@@ -1192,16 +1290,14 @@
   }
 
   std::unique_ptr<uint8_t[]> compressed_data(new uint8_t[compressed_data_size]);
-  bool bytes_read_success =
-      android::base::ReadFully(fd, compressed_data.get(), compressed_data_size);
-
-  if (testEOF(fd) != 0) {
-    *error += "Unexpected data in the profile file.";
-    return kProfileLoadBadData;
+  status = source->Read(compressed_data.get(), compressed_data_size, "ReadContent", error);
+  if (status != kProfileLoadSuccess) {
+    *error += "Unable to read compressed profile data";
+    return status;
   }
 
-  if (!bytes_read_success) {
-    *error += "Unable to read compressed profile data";
+  if (!source->HasConsumedAllData()) {
+    *error += "Unexpected data in the profile file.";
     return kProfileLoadBadData;
   }
 
@@ -1904,4 +2000,34 @@
   return ret;
 }
 
+bool ProfileCompilationInfo::IsProfileFile(int fd) {
+  // First check if it's an empty file as we allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return false;
+  }
+
+  if (stat_buffer.st_size == 0) {
+    return true;
+  }
+
+  // The files is not empty. Check if it contains the profile magic.
+  size_t byte_count = sizeof(kProfileMagic);
+  uint8_t buffer[sizeof(kProfileMagic)];
+  if (!android::base::ReadFully(fd, buffer, byte_count)) {
+    return false;
+  }
+
+  // Reset the offset to prepare the file for reading.
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to reset the offset";
+    return false;
+  }
+
+  return memcmp(buffer, kProfileMagic, byte_count) == 0;
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 7c30dee..29a4c11 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -28,6 +28,7 @@
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
 #include "method_reference.h"
+#include "mem_map.h"
 #include "safe_map.h"
 #include "type_reference.h"
 
@@ -71,6 +72,8 @@
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
 
+  static const char* kDexMetadataProfileEntry;
+
   // Data structures for encoding the offline representation of inline caches.
   // This is exposed as public in order to make it available to dex2oat compilations
   // (see compiler/optimizing/inliner.cc).
@@ -410,8 +413,11 @@
   // Return all of the class descriptors in the profile for a set of dex files.
   std::unordered_set<std::string> GetClassDescriptors(const std::vector<const DexFile*>& dex_files);
 
+  // Return true if the fd points to a profile file.
+  bool IsProfileFile(int fd);
+
  private:
-  enum ProfileLoadSatus {
+  enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
     kProfileLoadIOError,
     kProfileLoadVersionMismatch,
@@ -577,6 +583,58 @@
     uint32_t num_method_ids;
   };
 
+  /**
+   * Encapsulate the source of profile data for loading.
+   * The source can be either a plain file or a zip file.
+   * For zip files, the profile entry will be extracted to
+   * the memory map.
+   */
+  class ProfileSource {
+   public:
+    /**
+     * Create a profile source for the given fd. The ownership of the fd
+     * remains to the caller; as this class will not attempt to close it at any
+     * point.
+     */
+    static ProfileSource* Create(int32_t fd) {
+      DCHECK_GT(fd, -1);
+      return new ProfileSource(fd, /*map*/ nullptr);
+    }
+
+    /**
+     * Create a profile source backed by a memory map. The map can be null in
+     * which case it will the treated as an empty source.
+     */
+    static ProfileSource* Create(std::unique_ptr<MemMap>&& mem_map) {
+      return new ProfileSource(/*fd*/ -1, std::move(mem_map));
+    }
+
+    /**
+     * Read bytes from this source.
+     * Reading will advance the current source position so subsequent
+     * invocations will read from the las position.
+     */
+    ProfileLoadStatus Read(uint8_t* buffer,
+                           size_t byte_count,
+                           const std::string& debug_stage,
+                           std::string* error);
+
+    /** Return true if the source has 0 data. */
+    bool HasEmptyContent() const;
+    /** Return true if all the information from this source has been read. */
+    bool HasConsumedAllData() const;
+
+   private:
+    ProfileSource(int32_t fd, std::unique_ptr<MemMap>&& mem_map)
+        : fd_(fd), mem_map_(std::move(mem_map)), mem_map_cur_(0) {}
+
+    bool IsMemMap() const { return fd_ == -1; }
+
+    int32_t fd_;  // The fd is not owned by this class.
+    std::unique_ptr<MemMap> mem_map_;
+    size_t mem_map_cur_;  // Current position in the map to read from.
+  };
+
   // A helper structure to make sure we don't read past our buffers in the loops.
   struct SafeBuffer {
    public:
@@ -586,13 +644,9 @@
     }
 
     // Reads the content of the descriptor at the current position.
-    ProfileLoadSatus FillFromFd(int fd,
-                                const std::string& source,
-                                /*out*/std::string* error);
-
-    ProfileLoadSatus FillFromBuffer(uint8_t* buffer_ptr,
-                                    const std::string& source,
-                                    /*out*/std::string* error);
+    ProfileLoadStatus Fill(ProfileSource& source,
+                           const std::string& debug_stage,
+                           /*out*/std::string* error);
 
     // Reads an uint value (high bits to low bits) and advances the current pointer
     // with the number of bits read.
@@ -620,21 +674,27 @@
     uint8_t* ptr_current_;
   };
 
+  ProfileLoadStatus OpenSource(int32_t fd,
+                               /*out*/ std::unique_ptr<ProfileSource>* source,
+                               /*out*/ std::string* error);
+
   // Entry point for profile loding functionality.
-  ProfileLoadSatus LoadInternal(int fd, std::string* error, bool merge_classes = true);
+  ProfileLoadStatus LoadInternal(int32_t fd,
+                                 std::string* error,
+                                 bool merge_classes = true);
 
   // Read the profile header from the given fd and store the number of profile
   // lines into number_of_dex_files.
-  ProfileLoadSatus ReadProfileHeader(int fd,
-                                     /*out*/uint8_t* number_of_dex_files,
-                                     /*out*/uint32_t* size_uncompressed_data,
-                                     /*out*/uint32_t* size_compressed_data,
-                                     /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileHeader(ProfileSource& source,
+                                      /*out*/uint8_t* number_of_dex_files,
+                                      /*out*/uint32_t* size_uncompressed_data,
+                                      /*out*/uint32_t* size_compressed_data,
+                                      /*out*/std::string* error);
 
   // Read the header of a profile line from the given fd.
-  ProfileLoadSatus ReadProfileLineHeader(SafeBuffer& buffer,
-                                         /*out*/ProfileLineHeader* line_header,
-                                         /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileLineHeader(SafeBuffer& buffer,
+                                          /*out*/ProfileLineHeader* line_header,
+                                          /*out*/std::string* error);
 
   // Read individual elements from the profile line header.
   bool ReadProfileLineHeaderElements(SafeBuffer& buffer,
@@ -643,12 +703,12 @@
                                      /*out*/std::string* error);
 
   // Read a single profile line from the given fd.
-  ProfileLoadSatus ReadProfileLine(SafeBuffer& buffer,
-                                   uint8_t number_of_dex_files,
-                                   const ProfileLineHeader& line_header,
-                                   const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
-                                   bool merge_classes,
-                                   /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileLine(SafeBuffer& buffer,
+                                    uint8_t number_of_dex_files,
+                                    const ProfileLineHeader& line_header,
+                                    const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
+                                    bool merge_classes,
+                                    /*out*/std::string* error);
 
   // Read all the classes from the buffer into the profile `info_` structure.
   bool ReadClasses(SafeBuffer& buffer,
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 08042cc..6ce9bcb 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <stdio.h>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -29,6 +30,7 @@
 #include "mirror/class_loader.h"
 #include "scoped_thread_state_change-inl.h"
 #include "type_reference.h"
+#include "ziparchive/zip_writer.h"
 
 namespace art {
 
@@ -268,6 +270,50 @@
     }
   }
 
+  void TestProfileLoadFromZip(const char* zip_entry,
+                              size_t zip_flags,
+                              bool should_succeed,
+                              bool should_succeed_with_empty_profile = false) {
+    // Create a valid profile.
+    ScratchFile profile;
+    ProfileCompilationInfo saved_info;
+    for (uint16_t i = 0; i < 10; i++) {
+      ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+      ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    }
+    ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+
+    // Prepare the profile content for zipping.
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+    uint64_t data_size = profile.GetFile()->GetLength();
+    std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+    ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+    // Zip the profile content.
+    ScratchFile zip;
+    FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+    ZipWriter writer(file);
+    writer.StartEntry(zip_entry, zip_flags);
+    writer.WriteBytes(data.get(), data_size);
+    writer.FinishEntry();
+    writer.Finish();
+    fflush(file);
+    fclose(file);
+
+    // Verify loading from the zip archive.
+    ProfileCompilationInfo loaded_info;
+    ASSERT_TRUE(zip.GetFile()->ResetOffset());
+    ASSERT_EQ(should_succeed, loaded_info.Load(zip.GetFile()->GetPath(), false));
+    if (should_succeed) {
+      if (should_succeed_with_empty_profile) {
+        ASSERT_TRUE(loaded_info.IsEmpty());
+      } else {
+        ASSERT_TRUE(loaded_info.Equals(saved_info));
+      }
+    }
+  }
+
   // Cannot sizeof the actual arrays so hard code the values here.
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
@@ -934,4 +980,64 @@
   }
 }
 
+TEST_F(ProfileCompilationInfoTest, LoadFromZipCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kCompress | ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnAligned) {
+  TestProfileLoadFromZip("primary.prof",
+                         0,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadZipEntry) {
+  TestProfileLoadFromZip("invalid.profile.entry",
+                         0,
+                         /*should_succeed*/true,
+                         /*should_succeed_with_empty_profile*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadProfile) {
+  // Create a bad profile.
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Prepare the profile content for zipping.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  uint64_t data_size = profile.GetFile()->GetLength();
+  std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+  ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+  // Zip the profile content.
+  ScratchFile zip;
+  FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+  ZipWriter writer(file);
+  writer.StartEntry("primary.prof", ZipWriter::kCompress | ZipWriter::kAlign32);
+  writer.WriteBytes(data.get(), data_size);
+  writer.FinishEntry();
+  writer.Finish();
+  fflush(file);
+  fclose(file);
+
+  // Check that we failed to load.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(zip.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(zip)));
+}
+
 }  // namespace art
diff --git a/runtime/oat.h b/runtime/oat.h
index 36099b9..8f81010 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: ClassStatus in high bits.
-  static constexpr uint8_t kOatVersion[] = { '1', '3', '7', '\0' };
+  // Last oat version changed reason: Math.pow() intrinsic.
+  static constexpr uint8_t kOatVersion[] = { '1', '3', '8', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/subtype_check.h b/runtime/subtype_check.h
index 54d2f00..03a6d9c 100644
--- a/runtime/subtype_check.h
+++ b/runtime/subtype_check.h
@@ -283,6 +283,17 @@
     return SubtypeCheckInfo::kUninitialized;
   }
 
+  // Retrieve the state of this class's SubtypeCheckInfo.
+  //
+  // Cost: O(Depth(Class)).
+  //
+  // Returns: The precise SubtypeCheckInfo::State.
+  static SubtypeCheckInfo::State GetState(ClassPtr klass)
+      REQUIRES(Locks::subtype_check_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetSubtypeCheckInfo(klass).GetState();
+  }
+
   // Retrieve the path to root bitstring as a plain uintN_t value that is amenable to
   // be used by a fast check "encoded_src & mask_target == encoded_target".
   //
@@ -305,8 +316,9 @@
   static BitString::StorageType GetEncodedPathToRootForTarget(ClassPtr klass)
       REQUIRES(Locks::subtype_check_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK_EQ(SubtypeCheckInfo::kAssigned, GetSubtypeCheckInfo(klass).GetState());
-    return GetSubtypeCheckInfo(klass).GetEncodedPathToRoot();
+    SubtypeCheckInfo sci = GetSubtypeCheckInfo(klass);
+    DCHECK_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());
+    return sci.GetEncodedPathToRoot();
   }
 
   // Retrieve the path to root bitstring mask as a plain uintN_t value that is amenable to
@@ -318,8 +330,9 @@
   static BitString::StorageType GetEncodedPathToRootMask(ClassPtr klass)
       REQUIRES(Locks::subtype_check_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK_EQ(SubtypeCheckInfo::kAssigned, GetSubtypeCheckInfo(klass).GetState());
-    return GetSubtypeCheckInfo(klass).GetEncodedPathToRootMask();
+    SubtypeCheckInfo sci = GetSubtypeCheckInfo(klass);
+    DCHECK_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());
+    return sci.GetEncodedPathToRootMask();
   }
 
   // Is the source class a subclass of the target?
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 118cffe..cab91df 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -19,6 +19,7 @@
 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
 
 #include <memory>
+#include <unordered_set>
 
 #include <android-base/logging.h>
 
@@ -265,6 +266,8 @@
     // RETURN_VOID_NO_BARRIER instructions to RETURN_VOID instructions.
     return;
   }
+  // Make sure to not unquicken the same code item multiple times.
+  std::unordered_set<const DexFile::CodeItem*> unquickened_code_item;
   for (uint32_t i = 0; i < target_dex_file.NumClassDefs(); ++i) {
     const DexFile::ClassDef& class_def = target_dex_file.GetClassDef(i);
     const uint8_t* class_data = target_dex_file.GetClassData(class_def);
@@ -274,6 +277,10 @@
            class_it.Next()) {
         if (class_it.IsAtMethod() && class_it.GetMethodCodeItem() != nullptr) {
           const DexFile::CodeItem* code_item = class_it.GetMethodCodeItem();
+          if (!unquickened_code_item.emplace(code_item).second) {
+            // Already unquickened this code item, do not do it again.
+            continue;
+          }
           ArrayRef<const uint8_t> quicken_data;
           if (!quickening_info.empty()) {
             const uint32_t quickening_offset = GetQuickeningInfoOffset(
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 416ada8..afb3224 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1685,10 +1685,15 @@
 
   for (const DexInstructionPcPair& inst : code_item_accessor_) {
     const size_t dex_pc = inst.DexPc();
-    RegisterLine* reg_line = reg_table_.GetLine(dex_pc);
-    if (reg_line != nullptr) {
-      vios->Stream() << reg_line->Dump(this) << "\n";
+
+    // Might be asked to dump before the table is initialized.
+    if (reg_table_.IsInitialized()) {
+      RegisterLine* reg_line = reg_table_.GetLine(dex_pc);
+      if (reg_line != nullptr) {
+        vios->Stream() << reg_line->Dump(this) << "\n";
+      }
     }
+
     vios->Stream()
         << StringPrintf("0x%04zx", dex_pc) << ": " << GetInstructionFlags(dex_pc).ToString() << " ";
     const bool kDumpHexOfInstruction = false;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index cadf4eb..26c598f 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -77,6 +77,10 @@
   void Init(RegisterTrackingMode mode, InstructionFlags* flags, uint32_t insns_size,
             uint16_t registers_size, MethodVerifier* verifier);
 
+  bool IsInitialized() const {
+    return !register_lines_.empty();
+  }
+
   RegisterLine* GetLine(size_t idx) const {
     return register_lines_[idx].get();
   }
diff --git a/test/004-NativeAllocations/src-art/Main.java b/test/004-NativeAllocations/src-art/Main.java
index 29f907d..6b1c48d 100644
--- a/test/004-NativeAllocations/src-art/Main.java
+++ b/test/004-NativeAllocations/src-art/Main.java
@@ -82,8 +82,8 @@
     // case of blocking registerNativeAllocation.
     private static void triggerBlockingRegisterNativeAllocation() throws Exception {
         long maxMem = Runtime.getRuntime().maxMemory();
-        int size = (int)(maxMem / 32);
-        int allocationCount = 256;
+        int size = (int)(maxMem / 5);
+        int allocationCount = 10;
 
         long total = 0;
         for (int i = 0; i < allocationCount; ++i) {
@@ -111,7 +111,7 @@
         synchronized (deadlockLock) {
             allocateDeadlockingFinalizer();
             while (!aboutToDeadlock) {
-                checkRegisterNativeAllocation();
+                Runtime.getRuntime().gc();
             }
 
             // Do more allocations now that the finalizer thread is deadlocked so that we force
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 3506649..4868355 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1068,6 +1068,7 @@
   //
   /// CHECK-START: void Main.lengthAlias1(int[], int) BCE (after)
   /// CHECK-NOT:              BoundsCheck
+  /// CHECK-NOT:              Deoptimize
   public static void lengthAlias1(int[] a, int len) {
     if (len == a.length) {
       for (int i = 0; i < len; i++) {
@@ -1087,6 +1088,7 @@
   //
   /// CHECK-START: void Main.lengthAlias2(int[], int) BCE (after)
   /// CHECK-NOT:              BoundsCheck
+  /// CHECK-NOT:              Deoptimize
   public static void lengthAlias2(int[] a, int len) {
     if (len != a.length) {
       return;
@@ -1107,6 +1109,7 @@
   //
   /// CHECK-START: void Main.lengthAlias3(int[], int) BCE (after)
   /// CHECK-NOT:              BoundsCheck
+  /// CHECK-NOT:              Deoptimize
   public static void lengthAlias3(int[] a, int len) {
     if (a.length == len) {
       for (int i = 0; i < len; i++) {
@@ -1115,6 +1118,27 @@
     }
   }
 
+  /// CHECK-START: void Main.lengthAlias4(int[]) BCE (before)
+  /// CHECK-DAG: <<Arr:l\d+>> ParameterValue                loop:none
+  /// CHECK-DAG: <<Val:i\d+>> IntConstant 8                 loop:none
+  /// CHECK-DAG: <<Nul:l\d+>> NullCheck [<<Arr>>]           loop:none
+  /// CHECK-DAG: <<Len:i\d+>> ArrayLength [<<Nul>>]         loop:none
+  /// CHECK-DAG:              Equal [<<Len>>,<<Val>>]       loop:none
+  /// CHECK-DAG: <<Idx:i\d+>> Phi                           loop:<<Loop:B\d+>>
+  /// CHECK-DAG:              BoundsCheck [<<Idx>>,<<Len>>] loop:<<Loop>>
+  //
+  /// CHECK-START: void Main.lengthAlias4(int[]) BCE (after)
+  /// CHECK-NOT:              BoundsCheck
+  /// CHECK-NOT:              Deoptimize
+  public static void lengthAlias4(int[] a) {
+    if (8 != a.length) {
+      return;
+    }
+    for (int i = 0; i < 8; i++) {
+      a[i] = 4;
+    }
+  }
+
   static int[][] mA;
 
   /// CHECK-START: void Main.dynamicBCEAndIntrinsic(int) BCE (before)
@@ -1824,10 +1848,20 @@
         System.out.println("alias3 failed!");
       }
     }
-
-    lengthAlias1(array, /*mismatched value*/ 32);
+    lengthAlias4(array);
     for (int i = 0; i < 8; i++) {
-      if (array[i] != 3) {
+      if (array[i] != 4) {
+        System.out.println("alias4 failed!");
+      }
+    }
+
+    array = new int[10];
+    lengthAlias1(array, /*mismatched value*/ 8);
+    lengthAlias2(array, /*mismatched value*/ 8);
+    lengthAlias3(array, /*mismatched value*/ 8);
+    lengthAlias4(array);  // implicit mismatch
+    for (int i = 0; i < 10; i++) {
+      if (array[i] != 0) {
         System.out.println("mismatch failed!");
       }
     }
diff --git a/test/670-bitstring-type-check/build b/test/670-bitstring-type-check/build
new file mode 100644
index 0000000..38307f2
--- /dev/null
+++ b/test/670-bitstring-type-check/build
@@ -0,0 +1,216 @@
+#!/bin/bash
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# Write out the source file.
+
+mkdir src
+cat >src/Main.java <<EOF
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+EOF
+
+for i in {0..8192}; do echo "class Level1Class$i { }" >>src/Main.java; done
+for i in {0..1024}; do echo "class Level2Class$i extends Level1Class0 { }" >>src/Main.java; done
+
+cat >>src/Main.java <<EOF
+class Level3Class0 extends Level2Class0 { }
+class Level4Class0 extends Level3Class0 { }
+class Level5Class0 extends Level4Class0 { }
+class Level6Class0 extends Level5Class0 { }
+class Level7Class0 extends Level6Class0 { }
+class Level8Class0 extends Level7Class0 { }
+class Level9Class0 extends Level8Class0 { }
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // 8193 classes at level 1 make sure we shall have an overflow if there are 13 or
+    // less bits for the level 1 character. 1025 classes at level 2 similarly guarantees
+    // an overflow if the number of bits for level 2 character is 10 or less. To test
+    // type checks also for the depth overflow, we provide a hierarchy 9 levels deep.
+
+    // Make sure the bitstrings are initialized.
+    for (int i = 0; i <= 8192; ++i) {
+      Class.forName("Level1Class" + i).newInstance();
+    }
+    for (int i = 0; i <= 1024; ++i) {
+      Class.forName("Level2Class" + i).newInstance();
+    }
+
+    // Note: Using a different class for tests so that verification of Main.main() does
+    // not try to resolve classes used by the tests. This guarantees uninitialized type
+    // check bitstrings when we enter Main.main() and start initializing them above.
+    Helper.testInstanceOf();
+    Helper.testCheckCast();
+  }
+}
+
+class Helper {
+  public static void testInstanceOf() throws Exception {
+    for (int i = 1; i <= 9; ++i) {
+      Object o = createInstance("Level" + i + "Class0");
+      assertTrue(o instanceof Level1Class0);
+      if (o instanceof Level2Class0) {
+        assertFalse(i < 2);
+      } else {
+        assertTrue(i < 2);
+      }
+      if (o instanceof Level3Class0) {
+        assertFalse(i < 3);
+      } else {
+        assertTrue(i < 3);
+      }
+      if (o instanceof Level4Class0) {
+        assertFalse(i < 4);
+      } else {
+        assertTrue(i < 4);
+      }
+      if (o instanceof Level5Class0) {
+        assertFalse(i < 5);
+      } else {
+        assertTrue(i < 5);
+      }
+      if (o instanceof Level6Class0) {
+        assertFalse(i < 6);
+      } else {
+        assertTrue(i < 6);
+      }
+      if (o instanceof Level7Class0) {
+        assertFalse(i < 7);
+      } else {
+        assertTrue(i < 7);
+      }
+      if (o instanceof Level8Class0) {
+        assertFalse(i < 8);
+      } else {
+        assertTrue(i < 8);
+      }
+      if (o instanceof Level9Class0) {
+        assertFalse(i < 9);
+      } else {
+        assertTrue(i < 9);
+      }
+    }
+
+    assertTrue(createInstance("Level1Class8192") instanceof Level1Class8192);
+    assertFalse(createInstance("Level1Class8192") instanceof Level1Class0);
+    assertTrue(createInstance("Level2Class1024") instanceof Level2Class1024);
+    assertTrue(createInstance("Level2Class1024") instanceof Level1Class0);
+    assertFalse(createInstance("Level2Class1024") instanceof Level2Class0);
+  }
+
+  public static void testCheckCast() throws Exception {
+    for (int i = 1; i <= 9; ++i) {
+      Object o = createInstance("Level" + i + "Class0");
+      Level1Class0 l1c0 = (Level1Class0) o;
+      try {
+        Level2Class0 l2c0 = (Level2Class0) o;
+        assertFalse(i < 2);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 2);
+      }
+      try {
+        Level3Class0 l3c0 = (Level3Class0) o;
+        assertFalse(i < 3);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 3);
+      }
+      try {
+        Level4Class0 l4c0 = (Level4Class0) o;
+        assertFalse(i < 4);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 4);
+      }
+      try {
+        Level5Class0 l5c0 = (Level5Class0) o;
+        assertFalse(i < 5);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 5);
+      }
+      try {
+        Level6Class0 l6c0 = (Level6Class0) o;
+        assertFalse(i < 6);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 6);
+      }
+      try {
+        Level7Class0 l7c0 = (Level7Class0) o;
+        assertFalse(i < 7);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 7);
+      }
+      try {
+        Level8Class0 l8c0 = (Level8Class0) o;
+        assertFalse(i < 8);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 8);
+      }
+      try {
+        Level9Class0 l9c0 = (Level9Class0) o;
+        assertFalse(i < 9);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 9);
+      }
+    }
+
+    Level1Class8192 l1c8192 = (Level1Class8192) createInstance("Level1Class8192");
+    try {
+      Level1Class0 l1c0 = (Level1Class0) createInstance("Level1Class8192");
+      throw new AssertionError("Unexpected");
+    } catch (ClassCastException expected) {}
+    Level2Class1024 l2c1024 = (Level2Class1024) createInstance("Level2Class1024");
+    Level1Class0 l1c0 = (Level1Class0) createInstance("Level2Class1024");
+    try {
+      Level2Class0 l2c0 = (Level2Class0) createInstance("Level2Class1024");
+      throw new AssertionError("Unexpected");
+    } catch (ClassCastException expected) {}
+  }
+
+  public static Object createInstance(String className) throws Exception {
+    return Class.forName(className).newInstance();
+  }
+
+  public static void assertTrue(boolean value) throws Exception {
+    if (!value) {
+      throw new AssertionError();
+    }
+  }
+
+  public static void assertFalse(boolean value) throws Exception {
+    if (value) {
+      throw new AssertionError();
+    }
+  }
+}
+EOF
+
+./default-build "$@"
diff --git a/test/670-bitstring-type-check/expected.txt b/test/670-bitstring-type-check/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/670-bitstring-type-check/expected.txt
diff --git a/test/670-bitstring-type-check/info.txt b/test/670-bitstring-type-check/info.txt
new file mode 100644
index 0000000..a34ba86
--- /dev/null
+++ b/test/670-bitstring-type-check/info.txt
@@ -0,0 +1 @@
+Tests for the bitstring type checks.
diff --git a/test/710-varhandle-creation/src-art/Main.java b/test/710-varhandle-creation/src-art/Main.java
index 6d542bb..a737b5b 100644
--- a/test/710-varhandle-creation/src-art/Main.java
+++ b/test/710-varhandle-creation/src-art/Main.java
@@ -18,7 +18,6 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-import dalvik.system.VMRuntime;
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.VarHandle;
 import java.lang.invoke.VarHandle.AccessMode;
@@ -129,9 +128,6 @@
     static final VarHandle vbbd;
     static final VarHandle vbbo;
 
-    // Some test results vary depending on 32-bit vs 64-bit.
-    static final boolean IS_64_BIT = VMRuntime.getRuntime().is64Bit();
-
     static {
         try {
             vz = MethodHandles.lookup().findVarHandle(Main.class, "z", boolean.class);
@@ -1728,14 +1724,14 @@
         checkNotNull(vbaj);
         checkVarType(vbaj, long.class);
         checkCoordinateTypes(vbaj, "[class [B, int]");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_VOLATILE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_VOLATILE, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_ACQUIRE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_RELEASE, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_OPAQUE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_OPAQUE, true, "(byte[],int,long)void");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_SET, true, "(byte[],int,long,long)boolean");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(byte[],int,long,long)long");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(byte[],int,long,long)long");
@@ -1800,14 +1796,14 @@
         checkNotNull(vbad);
         checkVarType(vbad, double.class);
         checkCoordinateTypes(vbad, "[class [B, int]");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_VOLATILE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_VOLATILE, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_ACQUIRE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_RELEASE, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_OPAQUE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_OPAQUE, true, "(byte[],int,double)void");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_SET, true, "(byte[],int,double,double)boolean");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(byte[],int,double,double)double");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(byte[],int,double,double)double");
@@ -1953,14 +1949,14 @@
         checkNotNull(vbbj);
         checkVarType(vbbj, long.class);
         checkCoordinateTypes(vbbj, "[class java.nio.ByteBuffer, int]");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_VOLATILE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_VOLATILE, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_ACQUIRE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_RELEASE, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_OPAQUE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_OPAQUE, true, "(ByteBuffer,int,long)void");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_SET, true, "(ByteBuffer,int,long,long)boolean");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(ByteBuffer,int,long,long)long");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(ByteBuffer,int,long,long)long");
@@ -2025,14 +2021,14 @@
         checkNotNull(vbbd);
         checkVarType(vbbd, double.class);
         checkCoordinateTypes(vbbd, "[class java.nio.ByteBuffer, int]");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_VOLATILE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_VOLATILE, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_ACQUIRE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_RELEASE, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_OPAQUE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_OPAQUE, true, "(ByteBuffer,int,double)void");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_SET, true, "(ByteBuffer,int,double,double)boolean");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(ByteBuffer,int,double,double)double");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(ByteBuffer,int,double,double)double");
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 22c5106..c2408b0 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -25,6 +25,7 @@
 #include "instrumentation.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profile_compilation_info.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "nativehelper/ScopedUtfChars.h"