Merge "Add missing TEMP_FAILURE_RETRYs in dt_fd_forward"
diff --git a/benchmark/type-check/info.txt b/benchmark/type-check/info.txt
new file mode 100644
index 0000000..d14fb96
--- /dev/null
+++ b/benchmark/type-check/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating check-cast and instance-of instructions in a loop.
diff --git a/benchmark/type-check/src/TypeCheckBenchmark.java b/benchmark/type-check/src/TypeCheckBenchmark.java
new file mode 100644
index 0000000..96904d9
--- /dev/null
+++ b/benchmark/type-check/src/TypeCheckBenchmark.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TypeCheckBenchmark {
+    public void timeCheckCastLevel1ToLevel1(int count) {
+        Object[] arr = arr1;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel2ToLevel1(int count) {
+        Object[] arr = arr2;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel3ToLevel1(int count) {
+        Object[] arr = arr3;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel9ToLevel1(int count) {
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            Level1 l1 = (Level1) arr[i & 1023];
+        }
+    }
+
+    public void timeCheckCastLevel9ToLevel2(int count) {
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            Level2 l2 = (Level2) arr[i & 1023];
+        }
+    }
+
+    public void timeInstanceOfLevel1ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr1;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel2ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr2;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel3ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr3;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel9ToLevel1(int count) {
+        int sum = 0;
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level1) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public void timeInstanceOfLevel9ToLevel2(int count) {
+        int sum = 0;
+        Object[] arr = arr9;
+        for (int i = 0; i < count; ++i) {
+            if (arr[i & 1023] instanceof Level2) {
+              ++sum;
+            }
+        }
+        result = sum;
+    }
+
+    public static Object[] createArray(int level) {
+        try {
+            Class<?>[] ls = {
+                    null,
+                    Level1.class,
+                    Level2.class,
+                    Level3.class,
+                    Level4.class,
+                    Level5.class,
+                    Level6.class,
+                    Level7.class,
+                    Level8.class,
+                    Level9.class,
+            };
+            Class<?> l = ls[level];
+            Object[] array = new Object[1024];
+            for (int i = 0; i < array.length; ++i) {
+                array[i] = l.newInstance();
+            }
+            return array;
+        } catch (Exception unexpected) {
+            throw new Error("Initialization failure!");
+        }
+    }
+    Object[] arr1 = createArray(1);
+    Object[] arr2 = createArray(2);
+    Object[] arr3 = createArray(3);
+    Object[] arr9 = createArray(9);
+    int result;
+}
+
+class Level1 { }
+class Level2 extends Level1 { }
+class Level3 extends Level2 { }
+class Level4 extends Level3 { }
+class Level5 extends Level4 { }
+class Level6 extends Level5 { }
+class Level7 extends Level6 { }
+class Level8 extends Level7 { }
+class Level9 extends Level8 { }
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 4f5df03..1f210e1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -451,13 +451,27 @@
 
   ART_TEST_HOST_GTEST_DEPENDENCIES += $$(gtest_deps)
 
+.PHONY: $$(gtest_rule)
+ifeq (,$(SANITIZE_HOST))
+$$(gtest_rule): $$(gtest_exe) $$(gtest_deps)
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && \
+		$$(call ART_TEST_PASSED,$$@)) || $$(call ART_TEST_FAILED,$$@)
+else
 # Note: envsetup currently exports ASAN_OPTIONS=detect_leaks=0 to suppress leak detection, as some
 #       build tools (e.g., ninja) intentionally leak. We want leak checks when we run our tests, so
 #       override ASAN_OPTIONS. b/37751350
-.PHONY: $$(gtest_rule)
+# Note 2: Under sanitization, also capture the output, and run it through the stack tool on failure
+# (with the x86-64 ABI, as this allows symbolization of both x86 and x86-64). We don't do this in
+# general as it loses all the color output, and we have our own symbolization step when not running
+# under ASAN.
 $$(gtest_rule): $$(gtest_exe) $$(gtest_deps)
-	$(hide) ($$(call ART_TEST_SKIP,$$@) && ASAN_OPTIONS=detect_leaks=1 $$< && \
-		$$(call ART_TEST_PASSED,$$@)) || $$(call ART_TEST_FAILED,$$@)
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && set -o pipefail && \
+		ASAN_OPTIONS=detect_leaks=1 $$< 2>&1 | tee $$<.tmp.out >&2 && \
+		{ $$(call ART_TEST_PASSED,$$@) ; rm $$<.tmp.out ; }) || \
+		( grep -q AddressSanitizer $$<.tmp.out && \
+			{ echo "ABI: 'x86_64'" | cat - $$<.tmp.out | development/scripts/stack | tail -n 3000 ; } ; \
+		rm $$<.tmp.out ; $$(call ART_TEST_FAILED,$$@))
+endif
 
   ART_TEST_HOST_GTEST$$($(3)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(gtest_rule)
   ART_TEST_HOST_GTEST_RULES += $$(gtest_rule)
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 05fdc97..8af29d4 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,7 +23,6 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
-#include "jit/profile_compilation_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -34,6 +33,7 @@
 class CompilerDriver;
 class CompilerOptions;
 class CumulativeLogger;
+class ProfileCompilationInfo;
 class VerificationResults;
 
 template<class T> class Handle;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6c5cc50..70cbb01 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -56,6 +56,7 @@
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "intrinsics_enum.h"
+#include "jit/profile_compilation_info.h"
 #include "jni_internal.h"
 #include "linker/linker_patch.h"
 #include "mirror/class-inl.h"
@@ -673,7 +674,8 @@
 // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
 //       stable order.
 
-static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
+static void ResolveConstStrings(ClassLinker* class_linker,
+                                Handle<mirror::DexCache> dex_cache,
                                 const DexFile& dex_file,
                                 const DexFile::CodeItem* code_item)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -682,7 +684,6 @@
     return;
   }
 
-  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
     switch (inst->Opcode()) {
       case Instruction::CONST_STRING:
@@ -730,22 +731,105 @@
           dex_file->StringByTypeIdx(class_def.class_idx_));
       if (!compilation_enabled) {
         // Compilation is skipped, do not resolve const-string in code of this class.
-        // TODO: Make sure that inlining honors this.
+        // FIXME: Make sure that inlining honors this. b/26687569
         continue;
       }
 
       // Direct and virtual methods.
-      int64_t previous_method_idx = -1;
       while (it.HasNextMethod()) {
-        uint32_t method_idx = it.GetMemberIndex();
-        if (method_idx == previous_method_idx) {
-          // smali can create dex files with two encoded_methods sharing the same method_idx
-          // http://code.google.com/p/smali/issues/detail?id=119
-          it.Next();
-          continue;
+        ResolveConstStrings(class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+// Initialize type check bit strings for check-cast and instance-of in the code. Done to have
+// deterministic allocation behavior. Right now this is single-threaded for simplicity.
+// TODO: Collect the relevant type indices in parallel, then process them sequentially in a
+//       stable order.
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          ClassLinker* class_linker,
+                                          Handle<mirror::DexCache> dex_cache,
+                                          const DexFile& dex_file,
+                                          const DexFile::CodeItem* code_item)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
+    switch (inst->Opcode()) {
+      case Instruction::CHECK_CAST:
+      case Instruction::INSTANCE_OF: {
+        dex::TypeIndex type_index(
+            (inst->Opcode() == Instruction::CHECK_CAST) ? inst->VRegB_21c() : inst->VRegC_22c());
+        const char* descriptor = dex_file.StringByTypeIdx(type_index);
+        // We currently do not use the bitstring type check for array or final (including
+        // primitive) classes. We may reconsider this in future if it's deemed to be beneficial.
+        // And we cannot use it for classes outside the boot image as we do not know the runtime
+        // value of their bitstring when compiling (it may not even get assigned at runtime).
+        if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) {
+          ObjPtr<mirror::Class> klass =
+              class_linker->LookupResolvedType(type_index,
+                                               dex_cache.Get(),
+                                               /* class_loader */ nullptr);
+          CHECK(klass != nullptr) << descriptor << " should have been previously resolved.";
+          // Now assign the bitstring if the class is not final. Keep this in sync with sharpening.
+          if (!klass->IsFinal()) {
+            MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+            SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+          }
         }
-        previous_method_idx = method_idx;
-        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
+        break;
+      }
+
+      default:
+        break;
+    }
+  }
+}
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          const std::vector<const DexFile*>& dex_files,
+                                          TimingLogger* timings) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+
+  for (const DexFile* dex_file : dex_files) {
+    dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file));
+    TimingLogger::ScopedTiming t("Initialize type check bitstrings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      it.SkipAllFields();
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not look for type checks in code of this class.
+        // FIXME: Make sure that inlining honors this. b/26687569
+        continue;
+      }
+
+      // Direct and virtual methods.
+      while (it.HasNextMethod()) {
+        InitializeTypeCheckBitstrings(
+            driver, class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
         it.Next();
       }
       DCHECK(!it.HasNext());
@@ -847,6 +931,13 @@
 
   UpdateImageClasses(timings);
   VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false);
+
+  if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) {
+    // Initialize type check bit string used by check-cast and instanceof.
+    // Do this now to have a deterministic image.
+    // Note: This is done after UpdateImageClasses() at it relies on the image classes to be final.
+    InitializeTypeCheckBitstrings(this, dex_files, timings);
+  }
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 87a8a18..4b5916d 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -37,7 +37,6 @@
 #include "dex/dex_file_types.h"
 #include "dex/dex_to_dex_compiler.h"
 #include "driver/compiled_method_storage.h"
-#include "jit/profile_compilation_info.h"
 #include "method_reference.h"
 #include "os.h"
 #include "safe_map.h"
@@ -70,6 +69,7 @@
 class MemberOffset;
 template<class MirrorType> class ObjPtr;
 class ParallelCompilationManager;
+class ProfileCompilationInfo;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
 class TimingLogger;
@@ -77,6 +77,9 @@
 class VerificationResults;
 class VerifiedMethod;
 
+// Compile-time flag to enable/disable bitstring type checks.
+static constexpr bool kUseBitstringTypeCheck = true;
+
 enum EntryPointCallingConvention {
   // ABI of invocations to a method's interpreter entry point.
   kInterpreterAbi,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 3c5a37f..2dafbf7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -438,6 +438,8 @@
       case TypeCheckKind::kArrayCheck:
       case TypeCheckKind::kUnresolvedCheck:
         return false;
+      case TypeCheckKind::kBitstringCheck:
+        return true;
     }
     LOG(FATAL) << "Unreachable";
     UNREACHABLE();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13bbffa..b47a5cf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2112,6 +2112,26 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+    // Extract the bitstring bits.
+    __ Ubfx(temp, temp, 0, mask_bits);
+  }
+  // Compare the bitstring bits to `path_to_root`.
+  __ Cmp(temp, path_to_root);
+}
+
 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
@@ -3840,6 +3860,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -3848,7 +3870,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3861,7 +3889,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4047,6 +4077,23 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Cset(out, eq);
+      if (zero.IsLinked()) {
+        __ B(&done);
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -4069,7 +4116,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
@@ -4079,7 +4132,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
   DCHECK_GE(num_temps, 1u);
   DCHECK_LE(num_temps, 3u);
@@ -4260,6 +4315,20 @@
       __ B(ne, &start_loop);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f92c94f..cc369de 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -264,6 +264,8 @@
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
                                         vixl::aarch64::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch64::Register temp);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 577fe00..504c647 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2490,8 +2490,12 @@
   }
 
   if (!skip_overflow_check) {
-    UseScratchRegisterScope temps(GetVIXLAssembler());
-    vixl32::Register temp = temps.Acquire();
+    // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here.
+    for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+      DCHECK(!reg.Is(r4));
+    }
+    DCHECK(!kCoreCalleeSaves.Includes(r4));
+    vixl32::Register temp = r4;
     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
     // The load must immediately precede RecordPcInfo.
     ExactAssemblyScope aas(GetVIXLAssembler(),
@@ -7191,6 +7195,67 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check,
+    vixl32::Register temp,
+    vixl32::FlagsUpdate flags_update) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+  // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    // Check if the bitstring bits are equal to `path_to_root`.
+    if (flags_update == SetFlags) {
+      __ Cmp(temp, path_to_root);
+    } else {
+      __ Sub(temp, temp, path_to_root);
+    }
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+      // Compare the bitstring bits using SUB.
+      __ Sub(temp, temp, path_to_root);
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+    } else if (IsUint<16>(path_to_root)) {
+      if (temp.IsLow()) {
+        // Note: Optimized for size but contains one more dependent instruction than necessary.
+        //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+        //       macro assembler would use the high reg IP for the constant by default.
+        // Compare the bitstring bits using SUB.
+        __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
+        __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
+        // Shift out bits that do not contribute to the comparison.
+        __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      } else {
+        // Extract the bitstring bits.
+        __ Ubfx(temp, temp, 0, mask_bits);
+        // Check if the bitstring bits are equal to `path_to_root`.
+        if (flags_update == SetFlags) {
+          __ Cmp(temp, path_to_root);
+        } else {
+          __ Sub(temp, temp, path_to_root);
+        }
+      }
+    } else {
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+      if (flags_update == SetFlags) {
+        __ Cmp(temp, path_to_root << (32u - mask_bits));
+      } else {
+        __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+      }
+    }
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
@@ -7382,6 +7447,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7390,7 +7457,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7405,7 +7478,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7645,6 +7720,26 @@
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+      // If `out` is a low reg and we would have another low reg temp, we could
+      // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+      //
+      // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+      // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+      // would be the same and we would have fewer direct data dependencies.
+      codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
+      break;
+    }
   }
 
   if (done.IsReferenced()) {
@@ -7662,7 +7757,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -7671,7 +7772,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
   vixl32::Register temp = RegisterFrom(temp_loc);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7856,6 +7959,20 @@
       __ B(ne, &start_loop, /* far_target */ false);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   if (done.IsReferenced()) {
     __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 38570bb..bd815f4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -322,6 +322,9 @@
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
                                         vixl32::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch32::Register temp,
+                                         vixl::aarch32::FlagsUpdate flags_update);
   void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5c8e46e..2ed0ab7 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1929,6 +1929,34 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                     Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // Only stype 0 is supported.
 }
@@ -3289,12 +3317,20 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -3303,7 +3339,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3353,7 +3389,7 @@
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bne(temp, cls, slow_path->GetEntryLabel());
+      __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -3379,7 +3415,7 @@
       // exception.
       __ Beqz(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bne(temp, cls, &loop);
+      __ Bne(temp, cls.AsRegister<Register>(), &loop);
       break;
     }
 
@@ -3394,7 +3430,7 @@
       // Walk over the class hierarchy to find a match.
       MipsLabel loop;
       __ Bind(&loop);
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -3417,7 +3453,7 @@
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3476,7 +3512,21 @@
       // Go to next interface.
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bne(AT, cls, &loop);
+      __ Bne(AT, cls.AsRegister<Register>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnez(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -7207,6 +7257,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7215,7 +7267,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7227,7 +7285,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7257,7 +7315,7 @@
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<Register>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -7282,7 +7340,7 @@
                                        kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqz(out, &done);
-      __ Bne(out, cls, &loop);
+      __ Bne(out, cls.AsRegister<Register>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -7298,7 +7356,7 @@
       // Walk over the class hierarchy to find a match.
       MipsLabel loop, success;
       __ Bind(&loop);
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -7323,7 +7381,7 @@
                                         kCompilerReadBarrierOption);
       // Do an exact check.
       MipsLabel success;
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -7355,7 +7413,7 @@
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bne(out, cls, slow_path->GetEntryLabel());
+      __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -7387,6 +7445,20 @@
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 32b3e42..ffeb3b0 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -237,6 +237,7 @@
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bcfe051..3ae8a30 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1775,6 +1775,34 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       GpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // only stype 0 is supported
 }
@@ -2844,12 +2872,20 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -2858,7 +2894,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2908,7 +2944,7 @@
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bnec(temp, cls, slow_path->GetEntryLabel());
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -2934,7 +2970,7 @@
       // exception.
       __ Beqzc(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bnec(temp, cls, &loop);
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
       break;
     }
 
@@ -2949,7 +2985,7 @@
       // Walk over the class hierarchy to find a match.
       Mips64Label loop;
       __ Bind(&loop);
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -2972,7 +3008,7 @@
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3031,7 +3067,21 @@
       __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bnec(AT, cls, &loop);
+      __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnezc(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -5524,6 +5574,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5532,7 +5584,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5544,7 +5602,7 @@
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5574,7 +5632,7 @@
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<GpuRegister>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -5599,7 +5657,7 @@
                                        kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqzc(out, &done);
-      __ Bnec(out, cls, &loop);
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -5615,7 +5673,7 @@
       // Walk over the class hierarchy to find a match.
       Mips64Label loop, success;
       __ Bind(&loop);
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -5640,7 +5698,7 @@
                                         kCompilerReadBarrierOption);
       // Do an exact check.
       Mips64Label success;
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -5672,7 +5730,7 @@
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bnec(out, cls, slow_path->GetEntryLabel());
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -5704,6 +5762,20 @@
       __ Bc(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index d479410..87d5a9c 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -233,6 +233,7 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cbe9e0a..e85f900 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6234,6 +6234,27 @@
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                    Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if ((false) && mask_bits == 16u) {
+    // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
@@ -6426,6 +6447,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -6434,7 +6457,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for some cases.
@@ -6655,6 +6684,21 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ j(kNotEqual, &zero);
+      __ movl(out, Immediate(1));
+      __ jmp(&done);
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6681,6 +6725,10 @@
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
@@ -6900,6 +6948,19 @@
       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0082853..2d14d4c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -211,6 +211,7 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 510eec4..9f8b1bb 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5440,6 +5440,27 @@
   // No need for memory fence, thanks to the x86-64 memory model.
 }
 
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       CpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if ((false) && mask_bits == 16u) {
+    // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
   switch (desired_class_load_kind) {
@@ -5812,6 +5833,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5820,7 +5843,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
@@ -6049,6 +6078,27 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      if (zero.IsLinked()) {
+        __ j(kNotEqual, &zero);
+        __ movl(out, Immediate(1));
+        __ jmp(&done);
+      } else {
+        __ setcc(kEqual, out);
+        // setcc only sets the low byte.
+        __ andl(out, Immediate(1));
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6075,6 +6125,10 @@
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
@@ -6261,7 +6315,7 @@
       break;
     }
 
-    case TypeCheckKind::kInterfaceCheck:
+    case TypeCheckKind::kInterfaceCheck: {
       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
       // We can not get false positives by doing this.
       // /* HeapReference<Class> */ temp = obj->klass_
@@ -6297,6 +6351,20 @@
       // If `cls` was poisoned above, unpoison it.
       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
       break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   if (done.IsLinked()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e86123e..97f8ec7 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -208,6 +208,7 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8..fbcbe36 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
 #include "base/bit_vector-inl.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
 
 namespace art {
 
@@ -548,30 +553,85 @@
   }
 }
 
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
-  VisitInstruction(check);
-  HInstruction* input = check->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                                size_t input_pos,
+                                                bool check_value,
+                                                uint32_t expected_value,
+                                                const char* name) {
+  if (!check->InputAt(input_pos)->IsIntConstant()) {
+    AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
                           check->DebugName(),
                           check->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+                          input_pos,
+                          name,
+                          check->InputAt(2)->DebugName(),
+                          check->InputAt(2)->GetId()));
+  } else if (check_value) {
+    uint32_t actual_value =
+        static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+    if (actual_value != expected_value) {
+      AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+                            check->DebugName(),
+                            check->GetId(),
+                            name,
+                            actual_value,
+                            expected_value));
+    }
   }
 }
 
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
-  VisitInstruction(instruction);
-  HInstruction* input = instruction->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
-                          instruction->DebugName(),
-                          instruction->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+  VisitInstruction(check);
+  HInstruction* input = check->InputAt(1);
+  if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    if (!input->IsNullConstant()) {
+      AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
+    bool check_values = false;
+    BitString::StorageType expected_path_to_root = 0u;
+    BitString::StorageType expected_mask = 0u;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      ObjPtr<mirror::Class> klass = check->GetClass().Get();
+      MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+      SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+      if (state == SubtypeCheckInfo::kAssigned) {
+        expected_path_to_root =
+            SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+        expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+        check_values = true;
+      } else {
+        AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+                              check->DebugName(),
+                              check->GetId()));
+      }
+    }
+    CheckTypeCheckBitstringInput(
+        check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root");
+    CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask");
+  } else {
+    if (!input->IsLoadClass()) {
+      AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
   }
 }
 
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+  HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+  HandleTypeCheckInstruction(instruction);
+}
+
 void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d..dbedc40 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -71,6 +71,12 @@
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
 
+  void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                    size_t input_pos,
+                                    bool check_value,
+                                    uint32_t expected_value,
+                                    const char* name);
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
   void HandleLoop(HBasicBlock* loop_header);
   void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 12c6988..5519121 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -389,16 +389,23 @@
     StartAttributeStream("load_kind") << load_string->GetLoadKind();
   }
 
-  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
-    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+    StartAttributeStream("check_kind") << check->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
-        << check_cast->MustDoNullCheck() << std::noboolalpha;
+        << check->MustDoNullCheck() << std::noboolalpha;
+    if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+      StartAttributeStream("path_to_root") << std::hex
+          << "0x" << check->GetBitstringPathToRoot() << std::dec;
+      StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+    }
+  }
+
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+    HandleTypeCheckInstruction(check_cast);
   }
 
   void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
-    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
-    StartAttributeStream("must_do_null_check") << std::boolalpha
-        << instance_of->MustDoNullCheck() << std::noboolalpha;
+    HandleTypeCheckInstruction(instance_of);
   }
 
   void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
@@ -648,20 +655,32 @@
           << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
     }
 
+    // For the builder and the inliner, we want to add extra information on HInstructions
+    // that have reference types, and also HInstanceOf/HCheckcast.
     if ((IsPass(HGraphBuilder::kBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
-        && (instruction->GetType() == DataType::Type::kReference)) {
-      ReferenceTypeInfo info = instruction->IsLoadClass()
-        ? instruction->AsLoadClass()->GetLoadedClassRTI()
-        : instruction->GetReferenceTypeInfo();
+        && (instruction->GetType() == DataType::Type::kReference ||
+            instruction->IsInstanceOf() ||
+            instruction->IsCheckCast())) {
+      ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+          ? instruction->IsLoadClass()
+              ? instruction->AsLoadClass()->GetLoadedClassRTI()
+              : instruction->GetReferenceTypeInfo()
+          : instruction->IsInstanceOf()
+              ? instruction->AsInstanceOf()->GetTargetClassRTI()
+              : instruction->AsCheckCast()->GetTargetClassRTI();
       ScopedObjectAccess soa(Thread::Current());
       if (info.IsValid()) {
         StartAttributeStream("klass")
             << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
-        StartAttributeStream("can_be_null")
-            << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        if (instruction->GetType() == DataType::Type::kReference) {
+          StartAttributeStream("can_be_null")
+              << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        }
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
-      } else if (instruction->IsLoadClass()) {
+      } else if (instruction->IsLoadClass() ||
+                 instruction->IsInstanceOf() ||
+                 instruction->IsCheckCast()) {
         StartAttributeStream("klass") << "unresolved";
       } else {
         // The NullConstant may be added to the graph during other passes that happen between
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 64a1ecc..0205c6a 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1811,29 +1811,6 @@
   }
 }
 
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (cls == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
 void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
   HLoadString* load_string =
       new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
@@ -1848,22 +1825,8 @@
 HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
-  Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
-  bool needs_access_check = true;
-  if (klass != nullptr) {
-    if (klass->IsPublic()) {
-      needs_access_check = false;
-    } else {
-      ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
-      if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
-        needs_access_check = false;
-      }
-    }
-  }
-
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
   return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
 }
 
@@ -1908,25 +1871,83 @@
   return load_class;
 }
 
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+                                                        dex::TypeIndex type_index) {
+  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
+  ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass(
+      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_);
+  // TODO: Avoid creating excessive handles if the method references the same class repeatedly.
+  // (Use a map on the local_allocator_.)
+  return handles_->NewHandle(klass);
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+  if (klass == nullptr) {
+    return true;
+  } else if (klass->IsPublic()) {
+    return false;
+  } else {
+    ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+    return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+  }
+}
+
 void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
                                          uint8_t destination,
                                          uint8_t reference,
                                          dex::TypeIndex type_index,
                                          uint32_t dex_pc) {
   HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
-  HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
 
   ScopedObjectAccess soa(Thread::Current());
-  TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+  TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+      klass.Get(), code_generator_, compiler_driver_, needs_access_check);
+
+  HInstruction* class_or_null = nullptr;
+  HIntConstant* bitstring_path_to_root = nullptr;
+  HIntConstant* bitstring_mask = nullptr;
+  if (check_kind == TypeCheckKind::kBitstringCheck) {
+    // TODO: Allow using the bitstring check also if we need an access check.
+    DCHECK(!needs_access_check);
+    class_or_null = graph_->GetNullConstant(dex_pc);
+    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+    uint32_t path_to_root =
+        SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+    uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+    bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+    bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+  } else {
+    class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+  }
+  DCHECK(class_or_null != nullptr);
+
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (allocator_) HInstanceOf(object,
+                                                   class_or_null,
+                                                   check_kind,
+                                                   klass,
+                                                   dex_pc,
+                                                   allocator_,
+                                                   bitstring_path_to_root,
+                                                   bitstring_mask));
     UpdateLocal(destination, current_block_->GetLastInstruction());
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
     // We emit a CheckCast followed by a BoundType. CheckCast is a statement
     // which may throw. If it succeeds BoundType sets the new type of `object`
     // for all subsequent uses.
-    AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(
+        new (allocator_) HCheckCast(object,
+                                    class_or_null,
+                                    check_kind,
+                                    klass,
+                                    dex_pc,
+                                    allocator_,
+                                    bitstring_path_to_root,
+                                    bitstring_mask));
     AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
     UpdateLocal(reference, current_block_->GetLastInstruction());
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c53..f788292 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -39,6 +39,7 @@
 class HBasicBlockBuilder;
 class Instruction;
 class OptimizingCompilerStats;
+class ScopedObjectAccess;
 class SsaBuilder;
 class VariableSizedHandleScope;
 
@@ -232,6 +233,12 @@
                              bool needs_access_check)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Returns the outer-most compiling method's class.
   ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85d..2538fa3 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -576,7 +576,9 @@
 
 // Returns whether doing a type test between the class of `object` against `klass` has
 // a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+                                     HInstruction* object,
+                                     /*out*/bool* outcome) {
   DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
   ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
   ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +588,6 @@
     return false;
   }
 
-  ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
   if (!class_rti.IsValid()) {
     // Happens when the loaded class is unresolved.
     return false;
@@ -611,8 +612,8 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      check_cast->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -630,15 +631,18 @@
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
-      if (!load_class->HasUses()) {
-        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-        // However, here we know that it cannot because the checkcast was successfull, hence
-        // the class was already loaded.
-        load_class->GetBlock()->RemoveInstruction(load_class);
+      if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+        HLoadClass* load_class = check_cast->GetTargetClass();
+        if (!load_class->HasUses()) {
+          // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+          // However, here we know that it cannot because the checkcast was successfull, hence
+          // the class was already loaded.
+          load_class->GetBlock()->RemoveInstruction(load_class);
+        }
       }
     } else {
       // Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +653,8 @@
 
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
-  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      instruction->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -673,7 +677,7 @@
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
     MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
@@ -686,11 +690,14 @@
     }
     RecordSimplification();
     instruction->GetBlock()->RemoveInstruction(instruction);
-    if (outcome && !load_class->HasUses()) {
-      // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-      // However, here we know that it cannot because the instanceof check was successfull, hence
-      // the class was already loaded.
-      load_class->GetBlock()->RemoveInstruction(load_class);
+    if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+      HLoadClass* load_class = instruction->GetTargetClass();
+      if (!load_class->HasUses()) {
+        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+        // However, here we know that it cannot because the instanceof check was successfull, hence
+        // the class was already loaded.
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 91e475d..5587f87 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -3105,6 +3105,8 @@
       return os << "array_object_check";
     case TypeCheckKind::kArrayCheck:
       return os << "array_check";
+    case TypeCheckKind::kBitstringCheck:
+      return os << "bitstring_check";
     default:
       LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 66fca36..b0657d6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5951,8 +5951,7 @@
         special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
-        klass_(klass),
-        loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+        klass_(klass) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
     DCHECK(!is_referrers_class || !needs_access_check);
@@ -5962,6 +5961,7 @@
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(false);
   }
 
   bool IsClonable() const OVERRIDE { return true; }
@@ -6010,13 +6010,18 @@
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
-    return loaded_class_rti_;
+    if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
   }
 
-  void SetLoadedClassRTI(ReferenceTypeInfo rti) {
-    // Make sure we only set exact types (the loaded class should never be merged).
-    DCHECK(rti.IsExact());
-    loaded_class_rti_ = rti;
+  // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(true);
   }
 
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
@@ -6069,7 +6074,8 @@
   static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
   static constexpr size_t kFieldLoadKindSize =
       MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
-  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
   static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
   using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
 
@@ -6097,8 +6103,6 @@
   const DexFile& dex_file_;
 
   Handle<mirror::Class> klass_;
-
-  ReferenceTypeInfo loaded_class_rti_;
 };
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
@@ -6626,49 +6630,143 @@
   kInterfaceCheck,        // No optimization yet when checking against an interface.
   kArrayObjectCheck,      // Can just check if the array is not primitive.
   kArrayCheck,            // No optimization yet when checking against a generic array.
+  kBitstringCheck,        // Compare the type check bitstring.
   kLast = kArrayCheck
 };
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
  public:
-  HInstanceOf(HInstruction* object,
-              HLoadClass* target_class,
-              TypeCheckKind check_kind,
-              uint32_t dex_pc)
-      : HExpression(DataType::Type::kBool,
-                    SideEffectsForArchRuntimeCalls(check_kind),
-                    dex_pc) {
+  HTypeCheckInstruction(HInstruction* object,
+                        HInstruction* target_class_or_null,
+                        TypeCheckKind check_kind,
+                        Handle<mirror::Class> klass,
+                        uint32_t dex_pc,
+                        ArenaAllocator* allocator,
+                        HIntConstant* bitstring_path_to_root,
+                        HIntConstant* bitstring_mask,
+                        SideEffects side_effects)
+      : HVariableInputSizeInstruction(
+          side_effects,
+          dex_pc,
+          allocator,
+          /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+          kArenaAllocTypeCheckInputs),
+        klass_(klass) {
     SetPackedField<TypeCheckKindField>(check_kind);
     SetPackedFlag<kFlagMustDoNullCheck>(true);
+    SetPackedFlag<kFlagValidTargetClassRTI>(false);
     SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
+    SetRawInputAt(1, target_class_or_null);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+    if (check_kind == TypeCheckKind::kBitstringCheck) {
+      DCHECK(target_class_or_null->IsNullConstant());
+      SetRawInputAt(2, bitstring_path_to_root);
+      SetRawInputAt(3, bitstring_mask);
+    } else {
+      DCHECK(target_class_or_null->IsLoadClass());
+    }
   }
 
   HLoadClass* GetTargetClass() const {
+    DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
     HInstruction* load_class = InputAt(1);
     DCHECK(load_class->IsLoadClass());
     return load_class->AsLoadClass();
   }
 
+  uint32_t GetBitstringPathToRoot() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* path_to_root = InputAt(2);
+    DCHECK(path_to_root->IsIntConstant());
+    return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+  }
+
+  uint32_t GetBitstringMask() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* mask = InputAt(3);
+    DCHECK(mask->IsIntConstant());
+    return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
+  }
+
   bool IsClonable() const OVERRIDE { return true; }
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+    return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
   }
 
-  bool NeedsEnvironment() const OVERRIDE {
-    return CanCallRuntime(GetTypeCheckKind());
-  }
-
-  // Used only in code generation.
   bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
   void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
   TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
   bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
 
+  ReferenceTypeInfo GetTargetClassRTI() {
+    if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
+  }
+
+  // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidTargetClassRTI>(true);
+  }
+
+  Handle<mirror::Class> GetClass() const {
+    return klass_;
+  }
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
+
+ private:
+  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeCheckKindSize =
+      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
+  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
+  static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
+  static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+  Handle<mirror::Class> klass_;
+};
+
+class HInstanceOf FINAL : public HTypeCheckInstruction {
+ public:
+  HInstanceOf(HInstruction* object,
+              HInstruction* target_class_or_null,
+              TypeCheckKind check_kind,
+              Handle<mirror::Class> klass,
+              uint32_t dex_pc,
+              ArenaAllocator* allocator,
+              HIntConstant* bitstring_path_to_root,
+              HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+  DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; }
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return CanCallRuntime(GetTypeCheckKind());
+  }
+
   static bool CanCallRuntime(TypeCheckKind check_kind) {
     // Mips currently does runtime calls for any other checks.
     return check_kind != TypeCheckKind::kExactCheck;
@@ -6682,15 +6780,6 @@
 
  protected:
   DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
-
- private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFieldTypeCheckKindSize =
-      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
-  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
-  static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 };
 
 class HBoundType FINAL : public HExpression<1> {
@@ -6740,31 +6829,25 @@
   ReferenceTypeInfo upper_bound_;
 };
 
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTypeCheckInstruction {
  public:
   HCheckCast(HInstruction* object,
-             HLoadClass* target_class,
+             HInstruction* target_class_or_null,
              TypeCheckKind check_kind,
-             uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
-    SetPackedField<TypeCheckKindField>(check_kind);
-    SetPackedFlag<kFlagMustDoNullCheck>(true);
-    SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
-  }
-
-  HLoadClass* GetTargetClass() const {
-    HInstruction* load_class = InputAt(1);
-    DCHECK(load_class->IsLoadClass());
-    return load_class->AsLoadClass();
-  }
-
-  bool IsClonable() const OVERRIDE { return true; }
-  bool CanBeMoved() const OVERRIDE { return true; }
-
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
-  }
+             Handle<mirror::Class> klass,
+             uint32_t dex_pc,
+             ArenaAllocator* allocator,
+             HIntConstant* bitstring_path_to_root,
+             HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffects::CanTriggerGC()) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     // Instruction may throw a CheckCastError.
@@ -6773,24 +6856,10 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
-  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
-  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
-  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
-
   DECLARE_INSTRUCTION(CheckCast);
 
  protected:
   DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeCheckKindSize =
-      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
-  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
-  static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 };
 
 /**
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 0023265..a6a2f46 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -99,6 +99,7 @@
   kConstructorFenceRemovedLSE,
   kConstructorFenceRemovedPFRA,
   kConstructorFenceRemovedCFRE,
+  kBitstringTypeCheck,
   kJitOutOfMemoryForCommit,
   kLastStat
 };
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c00..5973339 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -34,6 +34,20 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
 void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
   check->ReplaceWith(check->InputAt(0));
 }
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f01..f6e4d3e 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,8 @@
       "prepare_for_register_allocation";
 
  private:
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;
   void VisitNullCheck(HNullCheck* check) OVERRIDE;
   void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 8bb124e..178d7fd 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -87,6 +87,7 @@
   void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
   void VisitLoadString(HLoadString* instr) OVERRIDE;
   void VisitLoadException(HLoadException* instr) OVERRIDE;
@@ -171,6 +172,12 @@
                 << "NullCheck " << instr->GetReferenceTypeInfo()
                 << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
           }
+        } else if (instr->IsInstanceOf()) {
+          HInstanceOf* iof = instr->AsInstanceOf();
+          DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+        } else if (instr->IsCheckCast()) {
+          HCheckCast* check = instr->AsCheckCast();
+          DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
         }
       }
     }
@@ -499,8 +506,7 @@
     return;
   }
 
-  HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
   if (!class_rti.IsValid()) {
     // He have loaded an unresolved class. Don't bother bounding the type.
     return;
@@ -644,15 +650,20 @@
 
 void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  Handle<mirror::Class> resolved_class = instr->GetClass();
-  if (IsAdmissible(resolved_class.Get())) {
-    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
-        resolved_class, /* is_exact */ true));
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidLoadedClassRTI();
   }
   instr->SetReferenceTypeInfo(
       ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
 }
 
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidTargetClassRTI();
+  }
+}
+
 void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
   instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
 }
@@ -720,8 +731,6 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
   if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
     // The next instruction is not an uninitialized BoundType. This must be
@@ -730,12 +739,14 @@
   }
   DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
 
-  if (class_rti.IsValid()) {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::Class> klass = check_cast->GetClass();
+  if (IsAdmissible(klass.Get())) {
     DCHECK(is_first_run_);
-    ScopedObjectAccess soa(Thread::Current());
+    check_cast->SetValidTargetClassRTI();
     // This is the first run of RTP and class is resolved.
-    bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
-    bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+    bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+    bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
                               /* CheckCast succeeds for nulls. */ true);
   } else {
     // This is the first run of RTP and class is unresolved. Remove the binding.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411..dffef17 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -236,6 +236,75 @@
   return load_kind;
 }
 
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(!klass->IsProxyClass());
+  DCHECK(!klass->IsArrayClass());
+
+  if (Runtime::Current()->UseJitCompilation()) {
+    // If we're JITting, try to assign a type check bitstring (fall through).
+  } else if (codegen->GetCompilerOptions().IsBootImage()) {
+    const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+    if (!compiler_driver->IsImageClass(descriptor)) {
+      return false;
+    }
+    // If the target is a boot image class, try to assign a type check bitstring (fall through).
+    // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+  } else {
+    // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+    // already assigned in the boot image.
+    return false;
+  }
+
+  // Try to assign a type check bitstring.
+  MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+  if ((false) &&  // FIXME: Inliner does not respect compiler_driver->IsClassToCompile()
+                  // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+      kIsDebugBuild &&
+      codegen->GetCompilerOptions().IsBootImage() &&
+      codegen->GetCompilerOptions().IsForceDeterminism()) {
+    SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+    CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+        << klass->PrettyDescriptor() << "/" << old_state
+        << " in " << codegen->GetGraph()->PrettyMethod();
+  }
+  SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+  return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                                CodeGenerator* codegen,
+                                                CompilerDriver* compiler_driver,
+                                                bool needs_access_check) {
+  if (klass == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (klass->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (klass->IsArrayClass()) {
+    if (klass->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (klass->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (klass->IsFinal()) {  // TODO: Consider using bitstring for final classes.
+    return TypeCheckKind::kExactCheck;
+  } else if (kUseBitstringTypeCheck &&
+             !needs_access_check &&
+             CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) {
+    // TODO: We should not need the `!needs_access_check` check but getting rid of that
+    // requires rewriting some optimizations in instruction simplifier.
+    return TypeCheckKind::kBitstringCheck;
+  } else if (klass->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
 void HSharpening::ProcessLoadString(
     HLoadString* load_string,
     CodeGenerator* codegen,
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d..fa3e948 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -44,12 +44,10 @@
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
-  // Used by the builder.
-  static void ProcessLoadString(HLoadString* load_string,
-                                CodeGenerator* codegen,
-                                CompilerDriver* compiler_driver,
-                                const DexCompilationUnit& dex_compilation_unit,
-                                VariableSizedHandleScope* handles);
+  // Used by Sharpening and InstructionSimplifier.
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* compiler_driver);
 
   // Used by the builder and the inliner.
   static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
@@ -58,10 +56,19 @@
                                                    const DexCompilationUnit& dex_compilation_unit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Used by Sharpening and InstructionSimplifier.
-  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
-                                          CodeGenerator* codegen,
-                                          CompilerDriver* compiler_driver);
+  // Used by the builder.
+  static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver,
+                                            bool needs_access_check)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Used by the builder.
+  static void ProcessLoadString(HLoadString* load_string,
+                                CodeGenerator* codegen,
+                                CompilerDriver* compiler_driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                VariableSizedHandleScope* handles);
 
  private:
   CodeGenerator* codegen_;
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 66041bb..ccb3ad8 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -39,6 +39,7 @@
 #include "dex/dex_file_types.h"
 #include "dex/standard_dex_file.h"
 #include "dex/verification_results.h"
+#include "dex_container.h"
 #include "dexlayout.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
@@ -46,6 +47,7 @@
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "jit/profile_compilation_info.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
 #include "linker/index_bss_mapping_encoder.h"
@@ -3495,18 +3497,21 @@
     return false;
   }
   Options options;
-  options.output_to_memmap_ = true;
   options.compact_dex_level_ = compact_dex_level_;
   options.update_checksum_ = true;
-  DexLayout dex_layout(options, profile_compilation_info_, nullptr);
-  dex_layout.ProcessDexFile(location.c_str(), dex_file.get(), 0);
-  std::unique_ptr<MemMap> mem_map(dex_layout.GetAndReleaseMemMap());
+  DexLayout dex_layout(options, profile_compilation_info_, /*file*/ nullptr, /*header*/ nullptr);
+  std::unique_ptr<DexContainer> out_data;
+  dex_layout.ProcessDexFile(location.c_str(), dex_file.get(), 0, &out_data);
   oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
   // Dex layout can affect the size of the dex file, so we update here what we have set
   // when adding the dex file as a source.
-  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(mem_map->Begin());
+  const UnalignedDexFileHeader* header =
+      AsUnalignedDexFileHeader(out_data->GetMainSection()->Begin());
   oat_dex_file->dex_file_size_ = header->file_size_;
-  if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) {
+  if (!WriteDexFile(out,
+                    oat_dex_file,
+                    out_data->GetMainSection()->Begin(),
+                    /* update_input_vdex */ false)) {
     return false;
   }
   CHECK_EQ(oat_dex_file->dex_file_location_checksum_, dex_file->GetLocationChecksum());
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index d5a8783..4e5fd72 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -32,6 +32,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "jit/profile_compilation_info.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/elf_writer.h"
 #include "linker/elf_writer_quick.h"
diff --git a/dexlayout/compact_dex_writer.cc b/dexlayout/compact_dex_writer.cc
index dd1eee7..2f601b6 100644
--- a/dexlayout/compact_dex_writer.cc
+++ b/dexlayout/compact_dex_writer.cc
@@ -24,20 +24,20 @@
 
 namespace art {
 
-CompactDexWriter::CompactDexWriter(dex_ir::Header* header,
-                                   MemMap* mem_map,
-                                   DexLayout* dex_layout,
-                                   CompactDexLevel compact_dex_level)
-    : DexWriter(header, mem_map, dex_layout, /*compute_offsets*/ true),
-      compact_dex_level_(compact_dex_level),
-      data_dedupe_(/*bucket_count*/ 32,
-                   HashedMemoryRange::HashEqual(mem_map->Begin()),
-                   HashedMemoryRange::HashEqual(mem_map->Begin())) {
-  CHECK(compact_dex_level_ != CompactDexLevel::kCompactDexLevelNone);
+CompactDexWriter::CompactDexWriter(DexLayout* dex_layout)
+    : DexWriter(dex_layout, /*compute_offsets*/ true) {
+  CHECK(GetCompactDexLevel() != CompactDexLevel::kCompactDexLevelNone);
 }
 
-uint32_t CompactDexWriter::WriteDebugInfoOffsetTable(uint32_t offset) {
-  const uint32_t start_offset = offset;
+CompactDexLevel CompactDexWriter::GetCompactDexLevel() const {
+  return dex_layout_->GetOptions().compact_dex_level_;
+}
+
+CompactDexWriter::Container::Container(bool dedupe_code_items)
+    : code_item_dedupe_(dedupe_code_items, &data_section_) {}
+
+uint32_t CompactDexWriter::WriteDebugInfoOffsetTable(Stream* stream) {
+  const uint32_t start_offset = stream->Tell();
   const dex_ir::Collections& collections = header_->GetCollections();
   // Debug offsets for method indexes. 0 means no debug info.
   std::vector<uint32_t> debug_info_offsets(collections.MethodIdsSize(), 0u);
@@ -79,15 +79,16 @@
                                         &debug_info_base_,
                                         &debug_info_offsets_table_offset_);
   // Align the table and write it out.
-  offset = RoundUp(offset, CompactDexDebugInfoOffsetTable::kAlignment);
-  debug_info_offsets_pos_ = offset;
-  offset += Write(data.data(), data.size(), offset);
+  stream->AlignTo(CompactDexDebugInfoOffsetTable::kAlignment);
+  debug_info_offsets_pos_ = stream->Tell();
+  stream->Write(data.data(), data.size());
 
   // Verify that the whole table decodes as expected and measure average performance.
   const bool kMeasureAndTestOutput = dex_layout_->GetOptions().verify_output_;
   if (kMeasureAndTestOutput && !debug_info_offsets.empty()) {
     uint64_t start_time = NanoTime();
-    CompactDexDebugInfoOffsetTable::Accessor accessor(mem_map_->Begin() + debug_info_offsets_pos_,
+    stream->Begin();
+    CompactDexDebugInfoOffsetTable::Accessor accessor(stream->Begin() + debug_info_offsets_pos_,
                                                       debug_info_base_,
                                                       debug_info_offsets_table_offset_);
 
@@ -99,19 +100,19 @@
               << (end_time - start_time) / debug_info_offsets.size();
   }
 
-  return offset - start_offset;
+  return stream->Tell() - start_offset;
 }
 
-uint32_t CompactDexWriter::WriteCodeItem(dex_ir::CodeItem* code_item,
-                                         uint32_t offset,
+uint32_t CompactDexWriter::WriteCodeItem(Stream* stream,
+                                         dex_ir::CodeItem* code_item,
                                          bool reserve_only) {
   DCHECK(code_item != nullptr);
   DCHECK(!reserve_only) << "Not supported because of deduping.";
-  const uint32_t start_offset = offset;
+  const uint32_t start_offset = stream->Tell();
 
   // Align to minimum requirements, additional alignment requirements are handled below after we
   // know the preheader size.
-  offset = RoundUp(offset, CompactDexFile::CodeItem::kAlignment);
+  stream->AlignTo(CompactDexFile::CodeItem::kAlignment);
 
   CompactDexFile::CodeItem disk_code_item;
 
@@ -127,7 +128,7 @@
   const size_t preheader_bytes = (preheader_end - preheader) * sizeof(preheader[0]);
 
   static constexpr size_t kPayloadInstructionRequiredAlignment = 4;
-  const uint32_t current_code_item_start = offset + preheader_bytes;
+  const uint32_t current_code_item_start = stream->Tell() + preheader_bytes;
   if (!IsAlignedParam(current_code_item_start, kPayloadInstructionRequiredAlignment)) {
     // If the preheader is going to make the code unaligned, consider adding 2 bytes of padding
     // before if required.
@@ -137,49 +138,60 @@
       if (opcode == Instruction::FILL_ARRAY_DATA ||
           opcode == Instruction::PACKED_SWITCH ||
           opcode == Instruction::SPARSE_SWITCH) {
-        offset += RoundUp(current_code_item_start, kPayloadInstructionRequiredAlignment) -
-            current_code_item_start;
+        stream->Skip(
+            RoundUp(current_code_item_start, kPayloadInstructionRequiredAlignment) -
+                current_code_item_start);
         break;
       }
     }
   }
 
-  const uint32_t data_start = offset;
+  const uint32_t data_start = stream->Tell();
 
   // Write preheader first.
-  offset += Write(reinterpret_cast<const uint8_t*>(preheader), preheader_bytes, offset);
+  stream->Write(reinterpret_cast<const uint8_t*>(preheader), preheader_bytes);
   // Registered offset is after the preheader.
-  ProcessOffset(&offset, code_item);
+  ProcessOffset(stream, code_item);
   // Avoid using sizeof so that we don't write the fake instruction array at the end of the code
   // item.
-  offset += Write(&disk_code_item,
-                  OFFSETOF_MEMBER(CompactDexFile::CodeItem, insns_),
-                  offset);
+  stream->Write(&disk_code_item, OFFSETOF_MEMBER(CompactDexFile::CodeItem, insns_));
   // Write the instructions.
-  offset += Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t), offset);
+  stream->Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t));
   // Write the post instruction data.
-  offset += WriteCodeItemPostInstructionData(code_item, offset, reserve_only);
+  WriteCodeItemPostInstructionData(stream, code_item, reserve_only);
 
-  if (dex_layout_->GetOptions().dedupe_code_items_ && compute_offsets_) {
-    // After having written, try to dedupe the whole code item (excluding padding).
-    uint32_t deduped_offset = DedupeData(data_start, offset, code_item->GetOffset());
-    if (deduped_offset != kDidNotDedupe) {
+  if (compute_offsets_) {
+    // After having written, maybe dedupe the whole code item (excluding padding).
+    const uint32_t deduped_offset = code_item_dedupe_->Dedupe(data_start,
+                                                              stream->Tell(),
+                                                              code_item->GetOffset());
+    if (deduped_offset != Deduper::kDidNotDedupe) {
       code_item->SetOffset(deduped_offset);
       // Undo the offset for all that we wrote since we deduped.
-      offset = start_offset;
+      stream->Seek(start_offset);
     }
   }
 
-  return offset - start_offset;
+  return stream->Tell() - start_offset;
 }
 
-uint32_t CompactDexWriter::DedupeData(uint32_t data_start,
-                                      uint32_t data_end,
-                                      uint32_t item_offset) {
+
+CompactDexWriter::Deduper::Deduper(bool enabled, DexContainer::Section* section)
+    : enabled_(enabled),
+      dedupe_map_(/*bucket_count*/ 32,
+                  HashedMemoryRange::HashEqual(section),
+                  HashedMemoryRange::HashEqual(section)) {}
+
+uint32_t CompactDexWriter::Deduper::Dedupe(uint32_t data_start,
+                                           uint32_t data_end,
+                                           uint32_t item_offset) {
+  if (!enabled_) {
+    return kDidNotDedupe;
+  }
   HashedMemoryRange range {data_start, data_end - data_start};
-  auto existing = data_dedupe_.emplace(range, item_offset);
+  auto existing = dedupe_map_.emplace(range, item_offset);
   if (!existing.second) {
-    // Failed to insert, item already existed in the map.
+    // Failed to insert means we deduped, return the existing item offset.
     return existing.first->second;
   }
   return kDidNotDedupe;
@@ -223,7 +235,7 @@
   });
 }
 
-void CompactDexWriter::WriteHeader() {
+void CompactDexWriter::WriteHeader(Stream* stream) {
   CompactDexFile::Header header;
   CompactDexFile::WriteMagic(&header.magic_[0]);
   CompactDexFile::WriteCurrentVersion(&header.magic_[0]);
@@ -263,78 +275,99 @@
   if (header_->SupportDefaultMethods()) {
     header.feature_flags_ |= static_cast<uint32_t>(CompactDexFile::FeatureFlags::kDefaultMethods);
   }
-  UNUSED(Write(reinterpret_cast<uint8_t*>(&header), sizeof(header), 0u));
+  stream->Seek(0);
+  stream->Overwrite(reinterpret_cast<uint8_t*>(&header), sizeof(header));
 }
 
 size_t CompactDexWriter::GetHeaderSize() const {
   return sizeof(CompactDexFile::Header);
 }
 
-void CompactDexWriter::WriteMemMap() {
+void CompactDexWriter::Write(DexContainer* output)  {
+  CHECK(output->IsCompactDexContainer());
+  Container* const container = down_cast<Container*>(output);
+  // For now, use the same stream for both data and metadata.
+  Stream stream(output->GetMainSection());
+  Stream* main_stream = &stream;
+  Stream* data_stream = &stream;
+  code_item_dedupe_ = &container->code_item_dedupe_;
+
   // Starting offset is right after the header.
-  uint32_t offset = GetHeaderSize();
+  main_stream->Seek(GetHeaderSize());
 
   dex_ir::Collections& collection = header_->GetCollections();
 
   // Based on: https://source.android.com/devices/tech/dalvik/dex-format
   // Since the offsets may not be calculated already, the writing must be done in the correct order.
-  const uint32_t string_ids_offset = offset;
-  offset += WriteStringIds(offset, /*reserve_only*/ true);
-  offset += WriteTypeIds(offset);
-  const uint32_t proto_ids_offset = offset;
-  offset += WriteProtoIds(offset, /*reserve_only*/ true);
-  offset += WriteFieldIds(offset);
-  offset += WriteMethodIds(offset);
-  const uint32_t class_defs_offset = offset;
-  offset += WriteClassDefs(offset, /*reserve_only*/ true);
-  const uint32_t call_site_ids_offset = offset;
-  offset += WriteCallSiteIds(offset, /*reserve_only*/ true);
-  offset += WriteMethodHandles(offset);
+  const uint32_t string_ids_offset = main_stream->Tell();
+  WriteStringIds(main_stream, /*reserve_only*/ true);
+  WriteTypeIds(main_stream);
+  const uint32_t proto_ids_offset = main_stream->Tell();
+  WriteProtoIds(main_stream, /*reserve_only*/ true);
+  WriteFieldIds(main_stream);
+  WriteMethodIds(main_stream);
+  const uint32_t class_defs_offset = main_stream->Tell();
+  WriteClassDefs(main_stream, /*reserve_only*/ true);
+  const uint32_t call_site_ids_offset = main_stream->Tell();
+  WriteCallSiteIds(main_stream, /*reserve_only*/ true);
+  WriteMethodHandles(main_stream);
 
   uint32_t data_offset_ = 0u;
   if (compute_offsets_) {
     // Data section.
-    offset = RoundUp(offset, kDataSectionAlignment);
-    data_offset_ = offset;
+    data_stream->AlignTo(kDataSectionAlignment);
+    data_offset_ = data_stream->Tell();
   }
 
   // Write code item first to minimize the space required for encoded methods.
   // For cdex, the code items don't depend on the debug info.
-  offset += WriteCodeItems(offset, /*reserve_only*/ false);
+  WriteCodeItems(data_stream, /*reserve_only*/ false);
 
   // Sort the debug infos by method index order, this reduces size by ~0.1% by reducing the size of
   // the debug info offset table.
   SortDebugInfosByMethodIndex();
-  offset += WriteDebugInfoItems(offset);
+  WriteDebugInfoItems(data_stream);
 
-  offset += WriteEncodedArrays(offset);
-  offset += WriteAnnotations(offset);
-  offset += WriteAnnotationSets(offset);
-  offset += WriteAnnotationSetRefs(offset);
-  offset += WriteAnnotationsDirectories(offset);
-  offset += WriteTypeLists(offset);
-  offset += WriteClassDatas(offset);
-  offset += WriteStringDatas(offset);
+  WriteEncodedArrays(data_stream);
+  WriteAnnotations(data_stream);
+  WriteAnnotationSets(data_stream);
+  WriteAnnotationSetRefs(data_stream);
+  WriteAnnotationsDirectories(data_stream);
+  WriteTypeLists(data_stream);
+  WriteClassDatas(data_stream);
+  WriteStringDatas(data_stream);
 
   // Write delayed id sections that depend on data sections.
-  WriteStringIds(string_ids_offset, /*reserve_only*/ false);
-  WriteProtoIds(proto_ids_offset, /*reserve_only*/ false);
-  WriteClassDefs(class_defs_offset, /*reserve_only*/ false);
-  WriteCallSiteIds(call_site_ids_offset, /*reserve_only*/ false);
+  {
+    Stream::ScopedSeek seek(main_stream, string_ids_offset);
+    WriteStringIds(main_stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(main_stream, proto_ids_offset);
+    WriteProtoIds(main_stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(main_stream, class_defs_offset);
+    WriteClassDefs(main_stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(main_stream, call_site_ids_offset);
+    WriteCallSiteIds(main_stream, /*reserve_only*/ false);
+  }
 
   // Write the map list.
   if (compute_offsets_) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeMapList));
-    collection.SetMapListOffset(offset);
+    data_stream->AlignTo(SectionAlignment(DexFile::kDexTypeMapList));
+    collection.SetMapListOffset(data_stream->Tell());
   } else {
-    offset = collection.MapListOffset();
+    data_stream->Seek(collection.MapListOffset());
   }
-  offset += GenerateAndWriteMapItems(offset);
-  offset = RoundUp(offset, kDataSectionAlignment);
+  GenerateAndWriteMapItems(data_stream);
+  data_stream->AlignTo(kDataSectionAlignment);
 
   // Map items are included in the data section.
   if (compute_offsets_) {
-    header_->SetDataSize(offset - data_offset_);
+    header_->SetDataSize(data_stream->Tell() - data_offset_);
     if (header_->DataSize() != 0) {
       // Offset must be zero when the size is zero.
       header_->SetDataOffset(data_offset_);
@@ -348,25 +381,34 @@
   if (link_data.size() > 0) {
     CHECK_EQ(header_->LinkSize(), static_cast<uint32_t>(link_data.size()));
     if (compute_offsets_) {
-      header_->SetLinkOffset(offset);
+      header_->SetLinkOffset(data_stream->Tell());
+    } else {
+      data_stream->Seek(header_->LinkOffset());
     }
-    offset += Write(&link_data[0], link_data.size(), header_->LinkOffset());
+    data_stream->Write(&link_data[0], link_data.size());
   }
 
   // Write debug info offset table last to make dex file verifier happy.
-  offset += WriteDebugInfoOffsetTable(offset);
+  WriteDebugInfoOffsetTable(data_stream);
 
   // Write header last.
   if (compute_offsets_) {
-    header_->SetFileSize(offset);
+    header_->SetFileSize(main_stream->Tell());
   }
-  WriteHeader();
+  WriteHeader(main_stream);
 
   if (dex_layout_->GetOptions().update_checksum_) {
-    header_->SetChecksum(DexFile::CalculateChecksum(mem_map_->Begin(), offset));
+    header_->SetChecksum(DexFile::CalculateChecksum(main_stream->Begin(), header_->FileSize()));
     // Rewrite the header with the calculated checksum.
-    WriteHeader();
+    WriteHeader(main_stream);
   }
+  // Trim the map to make it sized as large as the dex file.
+  output->GetMainSection()->Resize(header_->FileSize());
+}
+
+std::unique_ptr<DexContainer> CompactDexWriter::CreateDexContainer() const {
+  return std::unique_ptr<DexContainer>(
+      new CompactDexWriter::Container(dex_layout_->GetOptions().dedupe_code_items_));
 }
 
 }  // namespace art
diff --git a/dexlayout/compact_dex_writer.h b/dexlayout/compact_dex_writer.h
index cb53cae..626b85a 100644
--- a/dexlayout/compact_dex_writer.h
+++ b/dexlayout/compact_dex_writer.h
@@ -19,6 +19,7 @@
 #ifndef ART_DEXLAYOUT_COMPACT_DEX_WRITER_H_
 #define ART_DEXLAYOUT_COMPACT_DEX_WRITER_H_
 
+#include <memory>  // For unique_ptr
 #include <unordered_map>
 
 #include "dex_writer.h"
@@ -26,62 +27,108 @@
 
 namespace art {
 
-class HashedMemoryRange {
+// Compact dex writer for a single dex.
+class CompactDexWriter : public DexWriter {
  public:
-  uint32_t offset_;
-  uint32_t length_;
+  explicit CompactDexWriter(DexLayout* dex_layout);
 
-  class HashEqual {
+ protected:
+  class Deduper {
    public:
-    explicit HashEqual(const uint8_t* data) : data_(data) {}
+    static const uint32_t kDidNotDedupe = 0;
 
-    // Equal function.
-    bool operator()(const HashedMemoryRange& a, const HashedMemoryRange& b) const {
-      return a.length_ == b.length_ && std::equal(data_ + a.offset_,
-                                                  data_ + a.offset_ + a.length_,
-                                                  data_ + b.offset_);
+    // if not enabled, Dedupe will always return kDidNotDedupe.
+    explicit Deduper(bool enabled, DexContainer::Section* section);
+
+    // Deduplicate a blob of data that has been written to mem_map.
+    // Returns the offset of the deduplicated data or kDidNotDedupe did deduplication did not occur.
+    uint32_t Dedupe(uint32_t data_start, uint32_t data_end, uint32_t item_offset);
+
+   private:
+    class HashedMemoryRange {
+     public:
+      uint32_t offset_;
+      uint32_t length_;
+
+      class HashEqual {
+       public:
+        explicit HashEqual(DexContainer::Section* section) : section_(section) {}
+
+        // Equal function.
+        bool operator()(const HashedMemoryRange& a, const HashedMemoryRange& b) const {
+          if (a.length_ != b.length_) {
+            return false;
+          }
+          const uint8_t* data = Data();
+          return std::equal(data + a.offset_, data + a.offset_ + a.length_, data + b.offset_);
+        }
+
+        // Hash function.
+        size_t operator()(const HashedMemoryRange& range) const {
+          return HashBytes(Data() + range.offset_, range.length_);
+        }
+
+        ALWAYS_INLINE uint8_t* Data() const {
+          return section_->Begin();
+        }
+
+       private:
+        DexContainer::Section* const section_;
+      };
+    };
+
+    const bool enabled_;
+
+    // Dedupe map.
+    std::unordered_map<HashedMemoryRange,
+                       uint32_t,
+                       HashedMemoryRange::HashEqual,
+                       HashedMemoryRange::HashEqual> dedupe_map_;
+  };
+
+ public:
+  class Container : public DexContainer {
+   public:
+    Section* GetMainSection() OVERRIDE {
+      return &main_section_;
     }
 
-    // Hash function.
-    size_t operator()(const HashedMemoryRange& range) const {
-      return HashBytes(data_ + range.offset_, range.length_);
+    Section* GetDataSection() OVERRIDE {
+      return &data_section_;
+    }
+
+    bool IsCompactDexContainer() const OVERRIDE {
+      return true;
     }
 
    private:
-    const uint8_t* data_;
-  };
-};
+    explicit Container(bool dedupe_code_items);
 
-class CompactDexWriter : public DexWriter {
- public:
-  CompactDexWriter(dex_ir::Header* header,
-                   MemMap* mem_map,
-                   DexLayout* dex_layout,
-                   CompactDexLevel compact_dex_level);
+    VectorSection main_section_;
+    VectorSection data_section_;
+    Deduper code_item_dedupe_;
+
+    friend class CompactDexWriter;
+  };
 
  protected:
-  void WriteMemMap() OVERRIDE;
+  void Write(DexContainer* output) OVERRIDE;
 
-  void WriteHeader() OVERRIDE;
+  std::unique_ptr<DexContainer> CreateDexContainer() const OVERRIDE;
+
+  void WriteHeader(Stream* stream) OVERRIDE;
 
   size_t GetHeaderSize() const OVERRIDE;
 
-  uint32_t WriteDebugInfoOffsetTable(uint32_t offset);
+  uint32_t WriteDebugInfoOffsetTable(Stream* stream);
 
-  uint32_t WriteCodeItem(dex_ir::CodeItem* code_item, uint32_t offset, bool reserve_only) OVERRIDE;
+  uint32_t WriteCodeItem(Stream* stream, dex_ir::CodeItem* code_item, bool reserve_only) OVERRIDE;
 
   void SortDebugInfosByMethodIndex();
 
-  // Deduplicate a blob of data that has been written to mem_map. The backing storage is the actual
-  // mem_map contents to reduce RAM usage.
-  // Returns the offset of the deduplicated data or 0 if kDidNotDedupe did not occur.
-  uint32_t DedupeData(uint32_t data_start, uint32_t data_end, uint32_t item_offset);
+  CompactDexLevel GetCompactDexLevel() const;
 
  private:
-  const CompactDexLevel compact_dex_level_;
-
-  static const uint32_t kDidNotDedupe = 0;
-
   // Position in the compact dex file for the debug info table data starts.
   uint32_t debug_info_offsets_pos_ = 0u;
 
@@ -91,11 +138,8 @@
   // Base offset of where debug info starts in the dex file.
   uint32_t debug_info_base_ = 0u;
 
-  // Dedupe map.
-  std::unordered_map<HashedMemoryRange,
-                     uint32_t,
-                     HashedMemoryRange::HashEqual,
-                     HashedMemoryRange::HashEqual> data_dedupe_;
+  // State for where we are deduping.
+  Deduper* code_item_dedupe_ = nullptr;
 
   DISALLOW_COPY_AND_ASSIGN(CompactDexWriter);
 };
diff --git a/dexlayout/dex_container.h b/dexlayout/dex_container.h
new file mode 100644
index 0000000..7c426cb
--- /dev/null
+++ b/dexlayout/dex_container.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Header file of an in-memory representation of DEX files.
+ */
+
+#ifndef ART_DEXLAYOUT_DEX_CONTAINER_H_
+#define ART_DEXLAYOUT_DEX_CONTAINER_H_
+
+#include <vector>
+
+namespace art {
+
+// Dex container holds the artifacts produced by dexlayout and contains up to two sections: a main
+// section and a data section.
+// This container may also hold metadata used for multi dex deduplication in the future.
+class DexContainer {
+ public:
+  virtual ~DexContainer() {}
+
+  class Section {
+   public:
+    virtual ~Section() {}
+
+    // Returns the start of the memory region.
+    virtual uint8_t* Begin() = 0;
+
+    // Size in bytes.
+    virtual size_t Size() const = 0;
+
+    // Resize the backing storage.
+    virtual void Resize(size_t size) = 0;
+
+    // Returns the end of the memory region.
+    uint8_t* End() {
+      return Begin() + Size();
+    }
+  };
+
+  // Vector backed section.
+  class VectorSection : public Section {
+   public:
+    virtual ~VectorSection() {}
+
+    uint8_t* Begin() OVERRIDE {
+      return &data_[0];
+    }
+
+    size_t Size() const OVERRIDE {
+      return data_.size();
+    }
+
+    void Resize(size_t size) OVERRIDE {
+      data_.resize(size, 0u);
+    }
+
+   private:
+    std::vector<uint8_t> data_;
+  };
+
+  virtual Section* GetMainSection() = 0;
+  virtual Section* GetDataSection() = 0;
+  virtual bool IsCompactDexContainer() const = 0;
+};
+
+}  // namespace art
+
+#endif  // ART_DEXLAYOUT_DEX_CONTAINER_H_
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 3627717..1a84d23 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -486,11 +486,11 @@
   virtual ~Item() { }
 
   // Return the assigned offset.
-  uint32_t GetOffset() const {
+  uint32_t GetOffset() const WARN_UNUSED {
     CHECK(OffsetAssigned());
     return offset_;
   }
-  uint32_t GetSize() const { return size_; }
+  uint32_t GetSize() const WARN_UNUSED { return size_; }
   void SetOffset(uint32_t offset) { offset_ = offset; }
   void SetSize(uint32_t size) { size_ = size; }
   bool OffsetAssigned() const {
diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc
index d26c948..eb038a0 100644
--- a/dexlayout/dex_writer.cc
+++ b/dexlayout/dex_writer.cc
@@ -30,7 +30,7 @@
 
 namespace art {
 
-size_t EncodeIntValue(int32_t value, uint8_t* buffer) {
+static size_t EncodeIntValue(int32_t value, uint8_t* buffer) {
   size_t length = 0;
   if (value >= 0) {
     while (value > 0x7f) {
@@ -47,7 +47,7 @@
   return length;
 }
 
-size_t EncodeUIntValue(uint32_t value, uint8_t* buffer) {
+static size_t EncodeUIntValue(uint32_t value, uint8_t* buffer) {
   size_t length = 0;
   do {
     buffer[length++] = static_cast<uint8_t>(value);
@@ -56,7 +56,7 @@
   return length;
 }
 
-size_t EncodeLongValue(int64_t value, uint8_t* buffer) {
+static size_t EncodeLongValue(int64_t value, uint8_t* buffer) {
   size_t length = 0;
   if (value >= 0) {
     while (value > 0x7f) {
@@ -78,7 +78,7 @@
   uint32_t i_;
 };
 
-size_t EncodeFloatValue(float value, uint8_t* buffer) {
+static size_t EncodeFloatValue(float value, uint8_t* buffer) {
   FloatUnion float_union;
   float_union.f_ = value;
   uint32_t int_value = float_union.i_;
@@ -95,7 +95,7 @@
   uint64_t l_;
 };
 
-size_t EncodeDoubleValue(double value, uint8_t* buffer) {
+static size_t EncodeDoubleValue(double value, uint8_t* buffer) {
   DoubleUnion double_union;
   double_union.d_ = value;
   uint64_t long_value = double_union.l_;
@@ -107,26 +107,13 @@
   return 7 - index;
 }
 
-size_t DexWriter::Write(const void* buffer, size_t length, size_t offset) {
-  DCHECK_LE(offset + length, mem_map_->Size());
-  memcpy(mem_map_->Begin() + offset, buffer, length);
-  return length;
-}
+DexWriter::DexWriter(DexLayout* dex_layout, bool compute_offsets)
+    : header_(dex_layout->GetHeader()),
+      dex_layout_(dex_layout),
+      compute_offsets_(compute_offsets) {}
 
-size_t DexWriter::WriteSleb128(uint32_t value, size_t offset) {
-  uint8_t buffer[8];
-  EncodeSignedLeb128(buffer, value);
-  return Write(buffer, SignedLeb128Size(value), offset);
-}
-
-size_t DexWriter::WriteUleb128(uint32_t value, size_t offset) {
-  uint8_t buffer[8];
-  EncodeUnsignedLeb128(buffer, value);
-  return Write(buffer, UnsignedLeb128Size(value), offset);
-}
-
-size_t DexWriter::WriteEncodedValue(dex_ir::EncodedValue* encoded_value, size_t offset) {
-  size_t original_offset = offset;
+size_t DexWriter::WriteEncodedValue(Stream* stream, dex_ir::EncodedValue* encoded_value) {
+  size_t original_offset = stream->Tell();
   size_t start = 0;
   size_t length;
   uint8_t buffer[8];
@@ -175,284 +162,285 @@
       length = EncodeUIntValue(encoded_value->GetMethodId()->GetIndex(), buffer);
       break;
     case DexFile::kDexAnnotationArray:
-      offset += WriteEncodedValueHeader(type, 0, offset);
-      offset += WriteEncodedArray(encoded_value->GetEncodedArray()->GetEncodedValues(), offset);
-      return offset - original_offset;
+      WriteEncodedValueHeader(stream, type, 0);
+      WriteEncodedArray(stream, encoded_value->GetEncodedArray()->GetEncodedValues());
+      return stream->Tell() - original_offset;
     case DexFile::kDexAnnotationAnnotation:
-      offset += WriteEncodedValueHeader(type, 0, offset);
-      offset += WriteEncodedAnnotation(encoded_value->GetEncodedAnnotation(), offset);
-      return offset - original_offset;
+      WriteEncodedValueHeader(stream, type, 0);
+      WriteEncodedAnnotation(stream, encoded_value->GetEncodedAnnotation());
+      return stream->Tell() - original_offset;
     case DexFile::kDexAnnotationNull:
-      return WriteEncodedValueHeader(type, 0, offset);
+      return WriteEncodedValueHeader(stream, type, 0);
     case DexFile::kDexAnnotationBoolean:
-      return WriteEncodedValueHeader(type, encoded_value->GetBoolean() ? 1 : 0, offset);
+      return WriteEncodedValueHeader(stream, type, encoded_value->GetBoolean() ? 1 : 0);
     default:
       return 0;
   }
-  offset += WriteEncodedValueHeader(type, length - 1, offset);
-  offset += Write(buffer + start, length, offset);
-  return offset - original_offset;
+  WriteEncodedValueHeader(stream, type, length - 1);
+  stream->Write(buffer + start, length);
+  return stream->Tell() - original_offset;
 }
 
-size_t DexWriter::WriteEncodedValueHeader(int8_t value_type, size_t value_arg, size_t offset) {
+size_t DexWriter::WriteEncodedValueHeader(Stream* stream, int8_t value_type, size_t value_arg) {
   uint8_t buffer[1] = { static_cast<uint8_t>((value_arg << 5) | value_type) };
-  return Write(buffer, sizeof(uint8_t), offset);
+  return stream->Write(buffer, sizeof(uint8_t));
 }
 
-size_t DexWriter::WriteEncodedArray(dex_ir::EncodedValueVector* values, size_t offset) {
-  size_t original_offset = offset;
-  offset += WriteUleb128(values->size(), offset);
+size_t DexWriter::WriteEncodedArray(Stream* stream, dex_ir::EncodedValueVector* values) {
+  size_t original_offset = stream->Tell();
+  stream->WriteUleb128(values->size());
   for (std::unique_ptr<dex_ir::EncodedValue>& value : *values) {
-    offset += WriteEncodedValue(value.get(), offset);
+    WriteEncodedValue(stream, value.get());
   }
-  return offset - original_offset;
+  return stream->Tell() - original_offset;
 }
 
-size_t DexWriter::WriteEncodedAnnotation(dex_ir::EncodedAnnotation* annotation, size_t offset) {
-  size_t original_offset = offset;
-  offset += WriteUleb128(annotation->GetType()->GetIndex(), offset);
-  offset += WriteUleb128(annotation->GetAnnotationElements()->size(), offset);
+size_t DexWriter::WriteEncodedAnnotation(Stream* stream, dex_ir::EncodedAnnotation* annotation) {
+  size_t original_offset = stream->Tell();
+  stream->WriteUleb128(annotation->GetType()->GetIndex());
+  stream->WriteUleb128(annotation->GetAnnotationElements()->size());
   for (std::unique_ptr<dex_ir::AnnotationElement>& annotation_element :
       *annotation->GetAnnotationElements()) {
-    offset += WriteUleb128(annotation_element->GetName()->GetIndex(), offset);
-    offset += WriteEncodedValue(annotation_element->GetValue(), offset);
+    stream->WriteUleb128(annotation_element->GetName()->GetIndex());
+    WriteEncodedValue(stream, annotation_element->GetValue());
   }
-  return offset - original_offset;
+  return stream->Tell() - original_offset;
 }
 
-size_t DexWriter::WriteEncodedFields(dex_ir::FieldItemVector* fields, size_t offset) {
-  size_t original_offset = offset;
+size_t DexWriter::WriteEncodedFields(Stream* stream, dex_ir::FieldItemVector* fields) {
+  size_t original_offset = stream->Tell();
   uint32_t prev_index = 0;
   for (std::unique_ptr<dex_ir::FieldItem>& field : *fields) {
     uint32_t index = field->GetFieldId()->GetIndex();
-    offset += WriteUleb128(index - prev_index, offset);
-    offset += WriteUleb128(field->GetAccessFlags(), offset);
+    stream->WriteUleb128(index - prev_index);
+    stream->WriteUleb128(field->GetAccessFlags());
     prev_index = index;
   }
-  return offset - original_offset;
+  return stream->Tell() - original_offset;
 }
 
-size_t DexWriter::WriteEncodedMethods(dex_ir::MethodItemVector* methods, size_t offset) {
-  size_t original_offset = offset;
+size_t DexWriter::WriteEncodedMethods(Stream* stream, dex_ir::MethodItemVector* methods) {
+  size_t original_offset = stream->Tell();
   uint32_t prev_index = 0;
   for (std::unique_ptr<dex_ir::MethodItem>& method : *methods) {
     uint32_t index = method->GetMethodId()->GetIndex();
     uint32_t code_off = method->GetCodeItem() == nullptr ? 0 : method->GetCodeItem()->GetOffset();
-    offset += WriteUleb128(index - prev_index, offset);
-    offset += WriteUleb128(method->GetAccessFlags(), offset);
-    offset += WriteUleb128(code_off, offset);
+    stream->WriteUleb128(index - prev_index);
+    stream->WriteUleb128(method->GetAccessFlags());
+    stream->WriteUleb128(code_off);
     prev_index = index;
   }
-  return offset - original_offset;
+  return stream->Tell() - original_offset;
 }
 
 // TODO: Refactor this to remove duplicated boiler plate. One way to do this is adding
 // function that takes a CollectionVector<T> and uses overloading.
-uint32_t DexWriter::WriteStringIds(uint32_t offset, bool reserve_only) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteStringIds(Stream* stream, bool reserve_only) {
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::StringId>& string_id : header_->GetCollections().StringIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeStringIdItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeStringIdItem));
     if (reserve_only) {
-      offset += string_id->GetSize();
+      stream->Skip(string_id->GetSize());
     } else {
       uint32_t string_data_off = string_id->DataItem()->GetOffset();
-      offset += Write(&string_data_off, string_id->GetSize(), offset);
+      stream->Write(&string_data_off, string_id->GetSize());
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetStringIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteStringDatas(uint32_t offset) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteStringDatas(Stream* stream) {
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::StringData>& string_data : header_->GetCollections().StringDatas()) {
-    ProcessOffset(&offset, string_data.get());
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeStringDataItem));
-    offset += WriteUleb128(CountModifiedUtf8Chars(string_data->Data()), offset);
+    ProcessOffset(stream, string_data.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeStringDataItem));
+    stream->WriteUleb128(CountModifiedUtf8Chars(string_data->Data()));
+    stream->Write(string_data->Data(), strlen(string_data->Data()));
     // Skip null terminator (already zeroed out, no need to write).
-    offset += Write(string_data->Data(), strlen(string_data->Data()), offset) + 1u;
+    stream->Skip(1);
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetStringDatasOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteTypeIds(uint32_t offset) {
+uint32_t DexWriter::WriteTypeIds(Stream* stream) {
   uint32_t descriptor_idx[1];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::TypeId>& type_id : header_->GetCollections().TypeIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeTypeIdItem));
-    ProcessOffset(&offset, type_id.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeTypeIdItem));
+    ProcessOffset(stream, type_id.get());
     descriptor_idx[0] = type_id->GetStringId()->GetIndex();
-    offset += Write(descriptor_idx, type_id->GetSize(), offset);
+    stream->Write(descriptor_idx, type_id->GetSize());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetTypeIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteTypeLists(uint32_t offset) {
+uint32_t DexWriter::WriteTypeLists(Stream* stream) {
   uint32_t size[1];
   uint16_t list[1];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::TypeList>& type_list : header_->GetCollections().TypeLists()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeTypeList));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeTypeList));
     size[0] = type_list->GetTypeList()->size();
-    ProcessOffset(&offset, type_list.get());
-    offset += Write(size, sizeof(uint32_t), offset);
+    ProcessOffset(stream, type_list.get());
+    stream->Write(size, sizeof(uint32_t));
     for (const dex_ir::TypeId* type_id : *type_list->GetTypeList()) {
       list[0] = type_id->GetIndex();
-      offset += Write(list, sizeof(uint16_t), offset);
+      stream->Write(list, sizeof(uint16_t));
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetTypeListsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteProtoIds(uint32_t offset, bool reserve_only) {
+uint32_t DexWriter::WriteProtoIds(Stream* stream, bool reserve_only) {
   uint32_t buffer[3];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::ProtoId>& proto_id : header_->GetCollections().ProtoIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeProtoIdItem));
-    ProcessOffset(&offset, proto_id.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeProtoIdItem));
+    ProcessOffset(stream, proto_id.get());
     if (reserve_only) {
-      offset += proto_id->GetSize();
+      stream->Skip(proto_id->GetSize());
     } else {
       buffer[0] = proto_id->Shorty()->GetIndex();
       buffer[1] = proto_id->ReturnType()->GetIndex();
       buffer[2] = proto_id->Parameters() == nullptr ? 0 : proto_id->Parameters()->GetOffset();
-      offset += Write(buffer, proto_id->GetSize(), offset);
+      stream->Write(buffer, proto_id->GetSize());
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetProtoIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteFieldIds(uint32_t offset) {
+uint32_t DexWriter::WriteFieldIds(Stream* stream) {
   uint16_t buffer[4];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::FieldId>& field_id : header_->GetCollections().FieldIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeFieldIdItem));
-    ProcessOffset(&offset, field_id.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeFieldIdItem));
+    ProcessOffset(stream, field_id.get());
     buffer[0] = field_id->Class()->GetIndex();
     buffer[1] = field_id->Type()->GetIndex();
     buffer[2] = field_id->Name()->GetIndex();
     buffer[3] = field_id->Name()->GetIndex() >> 16;
-    offset += Write(buffer, field_id->GetSize(), offset);
+    stream->Write(buffer, field_id->GetSize());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetFieldIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteMethodIds(uint32_t offset) {
+uint32_t DexWriter::WriteMethodIds(Stream* stream) {
   uint16_t buffer[4];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::MethodId>& method_id : header_->GetCollections().MethodIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeMethodIdItem));
-    ProcessOffset(&offset, method_id.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeMethodIdItem));
+    ProcessOffset(stream, method_id.get());
     buffer[0] = method_id->Class()->GetIndex();
     buffer[1] = method_id->Proto()->GetIndex();
     buffer[2] = method_id->Name()->GetIndex();
     buffer[3] = method_id->Name()->GetIndex() >> 16;
-    offset += Write(buffer, method_id->GetSize(), offset);
+    stream->Write(buffer, method_id->GetSize());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetMethodIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteEncodedArrays(uint32_t offset) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteEncodedArrays(Stream* stream) {
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::EncodedArrayItem>& encoded_array :
       header_->GetCollections().EncodedArrayItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeEncodedArrayItem));
-    ProcessOffset(&offset, encoded_array.get());
-    offset += WriteEncodedArray(encoded_array->GetEncodedValues(), offset);
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeEncodedArrayItem));
+    ProcessOffset(stream, encoded_array.get());
+    WriteEncodedArray(stream, encoded_array->GetEncodedValues());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetEncodedArrayItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteAnnotations(uint32_t offset) {
+uint32_t DexWriter::WriteAnnotations(Stream* stream) {
   uint8_t visibility[1];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::AnnotationItem>& annotation :
       header_->GetCollections().AnnotationItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeAnnotationItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeAnnotationItem));
     visibility[0] = annotation->GetVisibility();
-    ProcessOffset(&offset, annotation.get());
-    offset += Write(visibility, sizeof(uint8_t), offset);
-    offset += WriteEncodedAnnotation(annotation->GetAnnotation(), offset);
+    ProcessOffset(stream, annotation.get());
+    stream->Write(visibility, sizeof(uint8_t));
+    WriteEncodedAnnotation(stream, annotation->GetAnnotation());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetAnnotationItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteAnnotationSets(uint32_t offset) {
+uint32_t DexWriter::WriteAnnotationSets(Stream* stream) {
   uint32_t size[1];
   uint32_t annotation_off[1];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::AnnotationSetItem>& annotation_set :
       header_->GetCollections().AnnotationSetItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeAnnotationSetItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeAnnotationSetItem));
     size[0] = annotation_set->GetItems()->size();
-    ProcessOffset(&offset, annotation_set.get());
-    offset += Write(size, sizeof(uint32_t), offset);
+    ProcessOffset(stream, annotation_set.get());
+    stream->Write(size, sizeof(uint32_t));
     for (dex_ir::AnnotationItem* annotation : *annotation_set->GetItems()) {
       annotation_off[0] = annotation->GetOffset();
-      offset += Write(annotation_off, sizeof(uint32_t), offset);
+      stream->Write(annotation_off, sizeof(uint32_t));
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetAnnotationSetItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteAnnotationSetRefs(uint32_t offset) {
+uint32_t DexWriter::WriteAnnotationSetRefs(Stream* stream) {
   uint32_t size[1];
   uint32_t annotations_off[1];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::AnnotationSetRefList>& annotation_set_ref :
       header_->GetCollections().AnnotationSetRefLists()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeAnnotationSetRefList));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeAnnotationSetRefList));
     size[0] = annotation_set_ref->GetItems()->size();
-    ProcessOffset(&offset, annotation_set_ref.get());
-    offset += Write(size, sizeof(uint32_t), offset);
+    ProcessOffset(stream, annotation_set_ref.get());
+    stream->Write(size, sizeof(uint32_t));
     for (dex_ir::AnnotationSetItem* annotation_set : *annotation_set_ref->GetItems()) {
       annotations_off[0] = annotation_set == nullptr ? 0 : annotation_set->GetOffset();
-      offset += Write(annotations_off, sizeof(uint32_t), offset);
+      stream->Write(annotations_off, sizeof(uint32_t));
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetAnnotationSetRefListsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteAnnotationsDirectories(uint32_t offset) {
+uint32_t DexWriter::WriteAnnotationsDirectories(Stream* stream) {
   uint32_t directory_buffer[4];
   uint32_t annotation_buffer[2];
-  const uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::AnnotationsDirectoryItem>& annotations_directory :
       header_->GetCollections().AnnotationsDirectoryItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeAnnotationsDirectoryItem));
-    ProcessOffset(&offset, annotations_directory.get());
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeAnnotationsDirectoryItem));
+    ProcessOffset(stream, annotations_directory.get());
     directory_buffer[0] = annotations_directory->GetClassAnnotation() == nullptr ? 0 :
         annotations_directory->GetClassAnnotation()->GetOffset();
     directory_buffer[1] = annotations_directory->GetFieldAnnotations() == nullptr ? 0 :
@@ -461,13 +449,13 @@
         annotations_directory->GetMethodAnnotations()->size();
     directory_buffer[3] = annotations_directory->GetParameterAnnotations() == nullptr ? 0 :
         annotations_directory->GetParameterAnnotations()->size();
-    offset += Write(directory_buffer, 4 * sizeof(uint32_t), offset);
+    stream->Write(directory_buffer, 4 * sizeof(uint32_t));
     if (annotations_directory->GetFieldAnnotations() != nullptr) {
       for (std::unique_ptr<dex_ir::FieldAnnotation>& field :
           *annotations_directory->GetFieldAnnotations()) {
         annotation_buffer[0] = field->GetFieldId()->GetIndex();
         annotation_buffer[1] = field->GetAnnotationSetItem()->GetOffset();
-        offset += Write(annotation_buffer, 2 * sizeof(uint32_t), offset);
+        stream->Write(annotation_buffer, 2 * sizeof(uint32_t));
       }
     }
     if (annotations_directory->GetMethodAnnotations() != nullptr) {
@@ -475,7 +463,7 @@
           *annotations_directory->GetMethodAnnotations()) {
         annotation_buffer[0] = method->GetMethodId()->GetIndex();
         annotation_buffer[1] = method->GetAnnotationSetItem()->GetOffset();
-        offset += Write(annotation_buffer, 2 * sizeof(uint32_t), offset);
+        stream->Write(annotation_buffer, 2 * sizeof(uint32_t));
       }
     }
     if (annotations_directory->GetParameterAnnotations() != nullptr) {
@@ -483,37 +471,36 @@
           *annotations_directory->GetParameterAnnotations()) {
         annotation_buffer[0] = parameter->GetMethodId()->GetIndex();
         annotation_buffer[1] = parameter->GetAnnotations()->GetOffset();
-        offset += Write(annotation_buffer, 2 * sizeof(uint32_t), offset);
+        stream->Write(annotation_buffer, 2 * sizeof(uint32_t));
       }
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetAnnotationsDirectoryItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteDebugInfoItems(uint32_t offset) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteDebugInfoItems(Stream* stream) {
+  const uint32_t start = stream->Tell();
   for (std::unique_ptr<dex_ir::DebugInfoItem>& debug_info :
       header_->GetCollections().DebugInfoItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeDebugInfoItem));
-    ProcessOffset(&offset, debug_info.get());
-    offset += Write(debug_info->GetDebugInfo(), debug_info->GetDebugInfoSize(), offset);
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeDebugInfoItem));
+    ProcessOffset(stream, debug_info.get());
+    stream->Write(debug_info->GetDebugInfo(), debug_info->GetDebugInfoSize());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetDebugInfoItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteCodeItemPostInstructionData(dex_ir::CodeItem* code_item,
-                                                     uint32_t offset,
+uint32_t DexWriter::WriteCodeItemPostInstructionData(Stream* stream,
+                                                     dex_ir::CodeItem* code_item,
                                                      bool reserve_only) {
-  const uint32_t start_offset = offset;
+  const uint32_t start_offset = stream->Tell();
   if (code_item->TriesSize() != 0) {
-    // Align for the try items.
-    offset = RoundUp(offset, DexFile::TryItem::kAlignment);
+    stream->AlignTo(DexFile::TryItem::kAlignment);
     // Write try items.
     for (std::unique_ptr<const dex_ir::TryItem>& try_item : *code_item->Tries()) {
       DexFile::TryItem disk_try_item;
@@ -522,38 +509,37 @@
         disk_try_item.insn_count_ = try_item->InsnCount();
         disk_try_item.handler_off_ = try_item->GetHandlers()->GetListOffset();
       }
-      offset += Write(&disk_try_item, sizeof(disk_try_item), offset);
+      stream->Write(&disk_try_item, sizeof(disk_try_item));
     }
-    size_t max_offset = offset;
     // Leave offset pointing to the end of the try items.
-    UNUSED(WriteUleb128(code_item->Handlers()->size(), offset));
+    const size_t offset = stream->Tell();
+    size_t max_offset = offset + stream->WriteUleb128(code_item->Handlers()->size());
     for (std::unique_ptr<const dex_ir::CatchHandler>& handlers : *code_item->Handlers()) {
-      size_t list_offset = offset + handlers->GetListOffset();
+      stream->Seek(offset + handlers->GetListOffset());
       uint32_t size = handlers->HasCatchAll() ? (handlers->GetHandlers()->size() - 1) * -1 :
           handlers->GetHandlers()->size();
-      list_offset += WriteSleb128(size, list_offset);
+      stream->WriteSleb128(size);
       for (std::unique_ptr<const dex_ir::TypeAddrPair>& handler : *handlers->GetHandlers()) {
         if (handler->GetTypeId() != nullptr) {
-          list_offset += WriteUleb128(handler->GetTypeId()->GetIndex(), list_offset);
+          stream->WriteUleb128(handler->GetTypeId()->GetIndex());
         }
-        list_offset += WriteUleb128(handler->GetAddress(), list_offset);
+        stream->WriteUleb128(handler->GetAddress());
       }
       // TODO: Clean this up to write the handlers in address order.
-      max_offset = std::max(max_offset, list_offset);
+      max_offset = std::max(max_offset, stream->Tell());
     }
-    offset = max_offset;
+    stream->Seek(max_offset);
   }
-
-  return offset - start_offset;
+  return stream->Tell() - start_offset;
 }
 
-uint32_t DexWriter::WriteCodeItem(dex_ir::CodeItem* code_item,
-                                  uint32_t offset,
+uint32_t DexWriter::WriteCodeItem(Stream* stream,
+                                  dex_ir::CodeItem* code_item,
                                   bool reserve_only) {
   DCHECK(code_item != nullptr);
-  const uint32_t start_offset = offset;
-  offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeCodeItem));
-  ProcessOffset(&offset, code_item);
+  const uint32_t start_offset = stream->Tell();
+  stream->AlignTo(SectionAlignment(DexFile::kDexTypeCodeItem));
+  ProcessOffset(stream, code_item);
 
   StandardDexFile::CodeItem disk_code_item;
   if (!reserve_only) {
@@ -568,50 +554,50 @@
   }
   // Avoid using sizeof so that we don't write the fake instruction array at the end of the code
   // item.
-  offset += Write(&disk_code_item,
-                  OFFSETOF_MEMBER(StandardDexFile::CodeItem, insns_),
-                  offset);
+  stream->Write(&disk_code_item, OFFSETOF_MEMBER(StandardDexFile::CodeItem, insns_));
   // Write the instructions.
-  offset += Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t), offset);
+  stream->Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t));
   // Write the post instruction data.
-  offset += WriteCodeItemPostInstructionData(code_item, offset, reserve_only);
-  return offset - start_offset;
+  WriteCodeItemPostInstructionData(stream, code_item, reserve_only);
+  if (reserve_only) {
+    stream->Clear(start_offset, stream->Tell() - start_offset);
+  }
+  return stream->Tell() - start_offset;
 }
 
-uint32_t DexWriter::WriteCodeItems(uint32_t offset, bool reserve_only) {
+uint32_t DexWriter::WriteCodeItems(Stream* stream, bool reserve_only) {
   DexLayoutSection* code_section = nullptr;
   if (!reserve_only && dex_layout_ != nullptr) {
     code_section = &dex_layout_->GetSections().sections_[static_cast<size_t>(
         DexLayoutSections::SectionType::kSectionTypeCode)];
   }
-  uint32_t start = offset;
+  const uint32_t start = stream->Tell();
   for (auto& code_item : header_->GetCollections().CodeItems()) {
-    const size_t code_item_size = WriteCodeItem(code_item.get(), offset, reserve_only);
+    const size_t code_item_size = WriteCodeItem(stream, code_item.get(), reserve_only);
     // Only add the section hotness info once.
     if (!reserve_only && code_section != nullptr) {
       auto it = dex_layout_->LayoutHotnessInfo().code_item_layout_.find(code_item.get());
       if (it != dex_layout_->LayoutHotnessInfo().code_item_layout_.end()) {
         code_section->parts_[static_cast<size_t>(it->second)].CombineSection(
-            offset,
-            offset + code_item_size);
+            stream->Tell() - code_item_size,
+            stream->Tell());
       }
     }
-    offset += code_item_size;
   }
 
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetCodeItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteClassDefs(uint32_t offset, bool reserve_only) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteClassDefs(Stream* stream, bool reserve_only) {
+  const uint32_t start = stream->Tell();
   uint32_t class_def_buffer[8];
   for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeClassDefItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeClassDefItem));
     if (reserve_only) {
-      offset += class_def->GetSize();
+      stream->Skip(class_def->GetSize());
     } else {
       class_def_buffer[0] = class_def->ClassType()->GetIndex();
       class_def_buffer[1] = class_def->GetAccessFlags();
@@ -626,94 +612,94 @@
           class_def->GetClassData()->GetOffset();
       class_def_buffer[7] = class_def->StaticValues() == nullptr ? 0 :
           class_def->StaticValues()->GetOffset();
-      offset += Write(class_def_buffer, class_def->GetSize(), offset);
+      stream->Write(class_def_buffer, class_def->GetSize());
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetClassDefsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteClassDatas(uint32_t offset) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteClassDatas(Stream* stream) {
+  const uint32_t start = stream->Tell();
   for (const std::unique_ptr<dex_ir::ClassData>& class_data :
       header_->GetCollections().ClassDatas()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeClassDataItem));
-    ProcessOffset(&offset, class_data.get());
-    offset += WriteUleb128(class_data->StaticFields()->size(), offset);
-    offset += WriteUleb128(class_data->InstanceFields()->size(), offset);
-    offset += WriteUleb128(class_data->DirectMethods()->size(), offset);
-    offset += WriteUleb128(class_data->VirtualMethods()->size(), offset);
-    offset += WriteEncodedFields(class_data->StaticFields(), offset);
-    offset += WriteEncodedFields(class_data->InstanceFields(), offset);
-    offset += WriteEncodedMethods(class_data->DirectMethods(), offset);
-    offset += WriteEncodedMethods(class_data->VirtualMethods(), offset);
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeClassDataItem));
+    ProcessOffset(stream, class_data.get());
+    stream->WriteUleb128(class_data->StaticFields()->size());
+    stream->WriteUleb128(class_data->InstanceFields()->size());
+    stream->WriteUleb128(class_data->DirectMethods()->size());
+    stream->WriteUleb128(class_data->VirtualMethods()->size());
+    WriteEncodedFields(stream, class_data->StaticFields());
+    WriteEncodedFields(stream, class_data->InstanceFields());
+    WriteEncodedMethods(stream, class_data->DirectMethods());
+    WriteEncodedMethods(stream, class_data->VirtualMethods());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetClassDatasOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteCallSiteIds(uint32_t offset, bool reserve_only) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteCallSiteIds(Stream* stream, bool reserve_only) {
+  const uint32_t start = stream->Tell();
   uint32_t call_site_off[1];
   for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id :
       header_->GetCollections().CallSiteIds()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeCallSiteIdItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeCallSiteIdItem));
     if (reserve_only) {
-      offset += call_site_id->GetSize();
+      stream->Skip(call_site_id->GetSize());
     } else {
       call_site_off[0] = call_site_id->CallSiteItem()->GetOffset();
-      offset += Write(call_site_off, call_site_id->GetSize(), offset);
+      stream->Write(call_site_off, call_site_id->GetSize());
     }
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetCallSiteIdsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteMethodHandles(uint32_t offset) {
-  const uint32_t start = offset;
+uint32_t DexWriter::WriteMethodHandles(Stream* stream) {
+  const uint32_t start = stream->Tell();
   uint16_t method_handle_buff[4];
   for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle :
       header_->GetCollections().MethodHandleItems()) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeMethodHandleItem));
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeMethodHandleItem));
     method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType());
     method_handle_buff[1] = 0;  // unused.
     method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex();
     method_handle_buff[3] = 0;  // unused.
-    offset += Write(method_handle_buff, method_handle->GetSize(), offset);
+    stream->Write(method_handle_buff, method_handle->GetSize());
   }
-  if (compute_offsets_ && start != offset) {
+  if (compute_offsets_ && start != stream->Tell()) {
     header_->GetCollections().SetMethodHandleItemsOffset(start);
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::WriteMapItems(uint32_t offset, MapItemQueue* queue) {
+uint32_t DexWriter::WriteMapItems(Stream* stream, MapItemQueue* queue) {
   // All the sections should already have been added.
   uint16_t uint16_buffer[2];
   uint32_t uint32_buffer[2];
   uint16_buffer[1] = 0;
   uint32_buffer[0] = queue->size();
-  const uint32_t start = offset;
-  offset += Write(uint32_buffer, sizeof(uint32_t), offset);
+  const uint32_t start = stream->Tell();
+  stream->Write(uint32_buffer, sizeof(uint32_t));
   while (!queue->empty()) {
     const MapItem& map_item = queue->top();
     uint16_buffer[0] = map_item.type_;
     uint32_buffer[0] = map_item.size_;
     uint32_buffer[1] = map_item.offset_;
-    offset += Write(uint16_buffer, 2 * sizeof(uint16_t), offset);
-    offset += Write(uint32_buffer, 2 * sizeof(uint32_t), offset);
+    stream->Write(uint16_buffer, 2 * sizeof(uint16_t));
+    stream->Write(uint32_buffer, 2 * sizeof(uint32_t));
     queue->pop();
   }
-  return offset - start;
+  return stream->Tell() - start;
 }
 
-uint32_t DexWriter::GenerateAndWriteMapItems(uint32_t offset) {
+uint32_t DexWriter::GenerateAndWriteMapItems(Stream* stream) {
   dex_ir::Collections& collection = header_->GetCollections();
   MapItemQueue queue;
 
@@ -777,10 +763,10 @@
                               collection.AnnotationsDirectoryItemsOffset()));
 
   // Write the map items.
-  return WriteMapItems(offset, &queue);
+  return WriteMapItems(stream, &queue);
 }
 
-void DexWriter::WriteHeader() {
+void DexWriter::WriteHeader(Stream* stream) {
   StandardDexFile::Header header;
   if (CompactDexFile::IsMagicValid(header_->Magic())) {
     StandardDexFile::WriteMagic(header.magic_);
@@ -818,78 +804,97 @@
 
   CHECK_EQ(sizeof(header), GetHeaderSize());
   static_assert(sizeof(header) == 0x70, "Size doesn't match dex spec");
-  UNUSED(Write(reinterpret_cast<uint8_t*>(&header), sizeof(header), 0u));
+  stream->Seek(0);
+  stream->Overwrite(reinterpret_cast<uint8_t*>(&header), sizeof(header));
 }
 
 size_t DexWriter::GetHeaderSize() const {
   return sizeof(StandardDexFile::Header);
 }
 
-void DexWriter::WriteMemMap() {
+void DexWriter::Write(DexContainer* output) {
+  Stream stream_storage(output->GetMainSection());
+  Stream* stream = &stream_storage;
+
   // Starting offset is right after the header.
-  uint32_t offset = GetHeaderSize();
+  stream->Seek(GetHeaderSize());
 
   dex_ir::Collections& collection = header_->GetCollections();
 
   // Based on: https://source.android.com/devices/tech/dalvik/dex-format
   // Since the offsets may not be calculated already, the writing must be done in the correct order.
-  const uint32_t string_ids_offset = offset;
-  offset += WriteStringIds(offset, /*reserve_only*/ true);
-  offset += WriteTypeIds(offset);
-  const uint32_t proto_ids_offset = offset;
-  offset += WriteProtoIds(offset, /*reserve_only*/ true);
-  offset += WriteFieldIds(offset);
-  offset += WriteMethodIds(offset);
-  const uint32_t class_defs_offset = offset;
-  offset += WriteClassDefs(offset, /*reserve_only*/ true);
-  const uint32_t call_site_ids_offset = offset;
-  offset += WriteCallSiteIds(offset, /*reserve_only*/ true);
-  offset += WriteMethodHandles(offset);
+  const uint32_t string_ids_offset = stream->Tell();
+  WriteStringIds(stream, /*reserve_only*/ true);
+  WriteTypeIds(stream);
+  const uint32_t proto_ids_offset = stream->Tell();
+  WriteProtoIds(stream, /*reserve_only*/ true);
+  WriteFieldIds(stream);
+  WriteMethodIds(stream);
+  const uint32_t class_defs_offset = stream->Tell();
+  WriteClassDefs(stream, /*reserve_only*/ true);
+  const uint32_t call_site_ids_offset = stream->Tell();
+  WriteCallSiteIds(stream, /*reserve_only*/ true);
+  WriteMethodHandles(stream);
 
   uint32_t data_offset_ = 0u;
   if (compute_offsets_) {
     // Data section.
-    offset = RoundUp(offset, kDataSectionAlignment);
-    data_offset_ = offset;
+    stream->AlignTo(kDataSectionAlignment);
+    data_offset_ = stream->Tell();
   }
 
   // Write code item first to minimize the space required for encoded methods.
   // Reserve code item space since we need the debug offsets to actually write them.
-  const uint32_t code_items_offset = offset;
-  offset += WriteCodeItems(offset, /*reserve_only*/ true);
+  const uint32_t code_items_offset = stream->Tell();
+  WriteCodeItems(stream, /*reserve_only*/ true);
   // Write debug info section.
-  offset += WriteDebugInfoItems(offset);
-  // Actually write code items since debug info offsets are calculated now.
-  WriteCodeItems(code_items_offset, /*reserve_only*/ false);
+  WriteDebugInfoItems(stream);
+  {
+    // Actually write code items since debug info offsets are calculated now.
+    Stream::ScopedSeek seek(stream, code_items_offset);
+    WriteCodeItems(stream, /*reserve_only*/ false);
+  }
 
-  offset += WriteEncodedArrays(offset);
-  offset += WriteAnnotations(offset);
-  offset += WriteAnnotationSets(offset);
-  offset += WriteAnnotationSetRefs(offset);
-  offset += WriteAnnotationsDirectories(offset);
-  offset += WriteTypeLists(offset);
-  offset += WriteClassDatas(offset);
-  offset += WriteStringDatas(offset);
+  WriteEncodedArrays(stream);
+  WriteAnnotations(stream);
+  WriteAnnotationSets(stream);
+  WriteAnnotationSetRefs(stream);
+  WriteAnnotationsDirectories(stream);
+  WriteTypeLists(stream);
+  WriteClassDatas(stream);
+  WriteStringDatas(stream);
 
   // Write delayed id sections that depend on data sections.
-  WriteStringIds(string_ids_offset, /*reserve_only*/ false);
-  WriteProtoIds(proto_ids_offset, /*reserve_only*/ false);
-  WriteClassDefs(class_defs_offset, /*reserve_only*/ false);
-  WriteCallSiteIds(call_site_ids_offset, /*reserve_only*/ false);
+  {
+    Stream::ScopedSeek seek(stream, string_ids_offset);
+    WriteStringIds(stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(stream, proto_ids_offset);
+    WriteProtoIds(stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(stream, class_defs_offset);
+    WriteClassDefs(stream, /*reserve_only*/ false);
+  }
+  {
+    Stream::ScopedSeek seek(stream, call_site_ids_offset);
+    WriteCallSiteIds(stream, /*reserve_only*/ false);
+  }
 
   // Write the map list.
   if (compute_offsets_) {
-    offset = RoundUp(offset, SectionAlignment(DexFile::kDexTypeMapList));
-    collection.SetMapListOffset(offset);
+    stream->AlignTo(SectionAlignment(DexFile::kDexTypeMapList));
+    collection.SetMapListOffset(stream->Tell());
   } else {
-    offset = collection.MapListOffset();
+    stream->Seek(collection.MapListOffset());
   }
-  offset += GenerateAndWriteMapItems(offset);
-  offset = RoundUp(offset, kDataSectionAlignment);
+  GenerateAndWriteMapItems(stream);
+  stream->AlignTo(kDataSectionAlignment);
 
   // Map items are included in the data section.
   if (compute_offsets_) {
-    header_->SetDataSize(offset - data_offset_);
+    header_->SetDataSize(stream->Tell() - data_offset_);
     if (header_->DataSize() != 0) {
       // Offset must be zero when the size is zero.
       header_->SetDataOffset(data_offset_);
@@ -903,37 +908,45 @@
   if (link_data.size() > 0) {
     CHECK_EQ(header_->LinkSize(), static_cast<uint32_t>(link_data.size()));
     if (compute_offsets_) {
-      header_->SetLinkOffset(offset);
+      header_->SetLinkOffset(stream->Tell());
+    } else {
+      stream->Seek(header_->LinkOffset());
     }
-    offset += Write(&link_data[0], link_data.size(), header_->LinkOffset());
+    stream->Write(&link_data[0], link_data.size());
   }
 
   // Write header last.
   if (compute_offsets_) {
-    header_->SetFileSize(offset);
+    header_->SetFileSize(stream->Tell());
   }
-  WriteHeader();
+  WriteHeader(stream);
 
   if (dex_layout_->GetOptions().update_checksum_) {
-    header_->SetChecksum(DexFile::CalculateChecksum(mem_map_->Begin(), offset));
+    header_->SetChecksum(DexFile::CalculateChecksum(stream->Begin(), header_->FileSize()));
     // Rewrite the header with the calculated checksum.
-    WriteHeader();
+    WriteHeader(stream);
   }
+
+  // Trim the map to make it sized as large as the dex file.
+  output->GetMainSection()->Resize(header_->FileSize());
 }
 
-void DexWriter::Output(dex_ir::Header* header,
-                       MemMap* mem_map,
-                       DexLayout* dex_layout,
-                       bool compute_offsets,
-                       CompactDexLevel compact_dex_level) {
+void DexWriter::Output(DexLayout* dex_layout,
+                       std::unique_ptr<DexContainer>* container,
+                       bool compute_offsets) {
   CHECK(dex_layout != nullptr);
   std::unique_ptr<DexWriter> writer;
-  if (compact_dex_level != CompactDexLevel::kCompactDexLevelNone) {
-    writer.reset(new CompactDexWriter(header, mem_map, dex_layout, compact_dex_level));
+  if (dex_layout->GetOptions().compact_dex_level_ != CompactDexLevel::kCompactDexLevelNone) {
+    CHECK(compute_offsets) << "Compact dex requires computing offsets";
+    writer.reset(new CompactDexWriter(dex_layout));
   } else {
-    writer.reset(new DexWriter(header, mem_map, dex_layout, compute_offsets));
+    writer.reset(new DexWriter(dex_layout, compute_offsets));
   }
-  writer->WriteMemMap();
+  DCHECK(container != nullptr);
+  if (*container == nullptr) {
+    *container = writer->CreateDexContainer();
+  }
+  writer->Write(container->get());
 }
 
 void MapItemQueue::AddIfNotEmpty(const MapItem& item) {
@@ -942,4 +955,17 @@
   }
 }
 
+void DexWriter::ProcessOffset(Stream* stream, dex_ir::Item* item) {
+  if (compute_offsets_) {
+    item->SetOffset(stream->Tell());
+  } else {
+    // Not computing offsets, just use the one in the item.
+    stream->Seek(item->GetOffset());
+  }
+}
+
+std::unique_ptr<DexContainer> DexWriter::CreateDexContainer() const {
+  return std::unique_ptr<DexContainer>(new DexWriter::Container);
+}
+
 }  // namespace art
diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h
index 892ea74..e581a8b 100644
--- a/dexlayout/dex_writer.h
+++ b/dexlayout/dex_writer.h
@@ -20,9 +20,11 @@
 #define ART_DEXLAYOUT_DEX_WRITER_H_
 
 #include <functional>
+#include <memory>  // For unique_ptr
 
 #include "base/unix_file/fd_file.h"
 #include "dex/compact_dex_level.h"
+#include "dex_container.h"
 #include "dex/dex_file.h"
 #include "dex_ir.h"
 #include "mem_map.h"
@@ -39,7 +41,7 @@
   // Not using DexFile::MapItemType since compact dex and standard dex file may have different
   // sections.
   MapItem() = default;
-  MapItem(uint32_t type, uint32_t size, uint32_t offset)
+  MapItem(uint32_t type, uint32_t size, size_t offset)
       : type_(type), size_(size), offset_(offset) { }
 
   // Sort by decreasing order since the priority_queue puts largest elements first.
@@ -63,6 +65,114 @@
   static constexpr uint32_t kDataSectionAlignment = sizeof(uint32_t) * 2;
   static constexpr uint32_t kDexSectionWordAlignment = 4;
 
+  // Stream that writes into a dex container section. Do not have two streams pointing to the same
+  // backing storage as there may be invalidation of backing storage to resize the section.
+  // Random access stream (consider refactoring).
+  class Stream {
+   public:
+    explicit Stream(DexContainer::Section* section) : section_(section) {
+      SyncWithSection();
+    }
+
+    const uint8_t* Begin() const {
+      return data_;
+    }
+
+    // Functions are not virtual (yet) for speed.
+    size_t Tell() const {
+      return position_;
+    }
+
+    void Seek(size_t position) {
+      position_ = position;
+    }
+
+    // Does not allow overwriting for bug prevention purposes.
+    ALWAYS_INLINE size_t Write(const void* buffer, size_t length) {
+      EnsureStorage(length);
+      for (size_t i = 0; i < length; ++i) {
+        DCHECK_EQ(data_[position_ + i], 0u);
+      }
+      memcpy(&data_[position_], buffer, length);
+      position_ += length;
+      return length;
+    }
+
+    ALWAYS_INLINE size_t Overwrite(const void* buffer, size_t length) {
+      EnsureStorage(length);
+      memcpy(&data_[position_], buffer, length);
+      position_ += length;
+      return length;
+    }
+
+    ALWAYS_INLINE size_t Clear(size_t position, size_t length) {
+      EnsureStorage(length);
+      memset(&data_[position], 0, length);
+      return length;
+    }
+
+    ALWAYS_INLINE size_t WriteSleb128(int32_t value) {
+      EnsureStorage(8);
+      uint8_t* ptr = &data_[position_];
+      const size_t len = EncodeSignedLeb128(ptr, value) - ptr;
+      position_ += len;
+      return len;
+    }
+
+    ALWAYS_INLINE size_t WriteUleb128(uint32_t value) {
+      EnsureStorage(8);
+      uint8_t* ptr = &data_[position_];
+      const size_t len = EncodeUnsignedLeb128(ptr, value) - ptr;
+      position_ += len;
+      return len;
+    }
+
+    ALWAYS_INLINE void AlignTo(const size_t alignment) {
+      position_ = RoundUp(position_, alignment);
+    }
+
+    ALWAYS_INLINE void Skip(const size_t count) {
+      position_ += count;
+    }
+
+    class ScopedSeek {
+     public:
+      ScopedSeek(Stream* stream, uint32_t offset) : stream_(stream), offset_(stream->Tell()) {
+        stream->Seek(offset);
+      }
+
+      ~ScopedSeek() {
+        stream_->Seek(offset_);
+      }
+
+     private:
+      Stream* const stream_;
+      const uint32_t offset_;
+    };
+
+   private:
+    ALWAYS_INLINE void EnsureStorage(size_t length) {
+      size_t end = position_ + length;
+      while (UNLIKELY(end > data_size_)) {
+        section_->Resize(data_size_ * 3 / 2 + 1);
+        SyncWithSection();
+      }
+    }
+
+    void SyncWithSection() {
+      data_ = section_->Begin();
+      data_size_ = section_->Size();
+    }
+
+    // Current position of the stream.
+    size_t position_ = 0u;
+    DexContainer::Section* const section_ = nullptr;
+    // Cached Begin() from the container to provide faster accesses.
+    uint8_t* data_ = nullptr;
+    // Cached Size from the container to provide faster accesses.
+    size_t data_size_ = 0u;
+  };
+
   static inline constexpr uint32_t SectionAlignment(DexFile::MapItemType type) {
     switch (type) {
       case DexFile::kDexTypeClassDataItem:
@@ -78,83 +188,85 @@
     }
   }
 
-  DexWriter(dex_ir::Header* header,
-            MemMap* mem_map,
-            DexLayout* dex_layout,
-            bool compute_offsets)
-      : header_(header),
-        mem_map_(mem_map),
-        dex_layout_(dex_layout),
-        compute_offsets_(compute_offsets) {}
+  class Container : public DexContainer {
+   public:
+    Section* GetMainSection() OVERRIDE {
+      return &main_section_;
+    }
 
-  static void Output(dex_ir::Header* header,
-                     MemMap* mem_map,
-                     DexLayout* dex_layout,
-                     bool compute_offsets,
-                     CompactDexLevel compact_dex_level);
+    Section* GetDataSection() OVERRIDE {
+      return &data_section_;
+    }
+
+    bool IsCompactDexContainer() const OVERRIDE {
+      return false;
+    }
+
+   private:
+    VectorSection main_section_;
+    VectorSection data_section_;
+
+    friend class CompactDexWriter;
+  };
+
+  DexWriter(DexLayout* dex_layout, bool compute_offsets);
+
+  static void Output(DexLayout* dex_layout,
+                     std::unique_ptr<DexContainer>* container,
+                     bool compute_offsets);
 
   virtual ~DexWriter() {}
 
  protected:
-  virtual void WriteMemMap();
+  virtual void Write(DexContainer* output);
+  virtual std::unique_ptr<DexContainer> CreateDexContainer() const;
 
-  size_t Write(const void* buffer, size_t length, size_t offset) WARN_UNUSED;
-  size_t WriteSleb128(uint32_t value, size_t offset) WARN_UNUSED;
-  size_t WriteUleb128(uint32_t value, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedValue(dex_ir::EncodedValue* encoded_value, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedValueHeader(int8_t value_type, size_t value_arg, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedArray(dex_ir::EncodedValueVector* values, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedAnnotation(dex_ir::EncodedAnnotation* annotation, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedFields(dex_ir::FieldItemVector* fields, size_t offset) WARN_UNUSED;
-  size_t WriteEncodedMethods(dex_ir::MethodItemVector* methods, size_t offset) WARN_UNUSED;
+  size_t WriteEncodedValue(Stream* stream, dex_ir::EncodedValue* encoded_value);
+  size_t WriteEncodedValueHeader(Stream* stream, int8_t value_type, size_t value_arg);
+  size_t WriteEncodedArray(Stream* stream, dex_ir::EncodedValueVector* values);
+  size_t WriteEncodedAnnotation(Stream* stream, dex_ir::EncodedAnnotation* annotation);
+  size_t WriteEncodedFields(Stream* stream, dex_ir::FieldItemVector* fields);
+  size_t WriteEncodedMethods(Stream* stream, dex_ir::MethodItemVector* methods);
 
   // Header and id section
-  virtual void WriteHeader();
+  virtual void WriteHeader(Stream* stream);
   virtual size_t GetHeaderSize() const;
   // reserve_only means don't write, only reserve space. This is required since the string data
   // offsets must be assigned.
-  uint32_t WriteStringIds(uint32_t offset, bool reserve_only);
-  uint32_t WriteTypeIds(uint32_t offset);
-  uint32_t WriteProtoIds(uint32_t offset, bool reserve_only);
-  uint32_t WriteFieldIds(uint32_t offset);
-  uint32_t WriteMethodIds(uint32_t offset);
-  uint32_t WriteClassDefs(uint32_t offset, bool reserve_only);
-  uint32_t WriteCallSiteIds(uint32_t offset, bool reserve_only);
+  uint32_t WriteStringIds(Stream* stream, bool reserve_only);
+  uint32_t WriteTypeIds(Stream* stream);
+  uint32_t WriteProtoIds(Stream* stream, bool reserve_only);
+  uint32_t WriteFieldIds(Stream* stream);
+  uint32_t WriteMethodIds(Stream* stream);
+  uint32_t WriteClassDefs(Stream* stream, bool reserve_only);
+  uint32_t WriteCallSiteIds(Stream* stream, bool reserve_only);
 
-  uint32_t WriteEncodedArrays(uint32_t offset);
-  uint32_t WriteAnnotations(uint32_t offset);
-  uint32_t WriteAnnotationSets(uint32_t offset);
-  uint32_t WriteAnnotationSetRefs(uint32_t offset);
-  uint32_t WriteAnnotationsDirectories(uint32_t offset);
+  uint32_t WriteEncodedArrays(Stream* stream);
+  uint32_t WriteAnnotations(Stream* stream);
+  uint32_t WriteAnnotationSets(Stream* stream);
+  uint32_t WriteAnnotationSetRefs(Stream* stream);
+  uint32_t WriteAnnotationsDirectories(Stream* stream);
 
   // Data section.
-  uint32_t WriteDebugInfoItems(uint32_t offset);
-  uint32_t WriteCodeItems(uint32_t offset, bool reserve_only);
-  uint32_t WriteTypeLists(uint32_t offset);
-  uint32_t WriteStringDatas(uint32_t offset);
-  uint32_t WriteClassDatas(uint32_t offset);
-  uint32_t WriteMethodHandles(uint32_t offset);
-  uint32_t WriteMapItems(uint32_t offset, MapItemQueue* queue);
-  uint32_t GenerateAndWriteMapItems(uint32_t offset);
+  uint32_t WriteDebugInfoItems(Stream* stream);
+  uint32_t WriteCodeItems(Stream* stream, bool reserve_only);
+  uint32_t WriteTypeLists(Stream* stream);
+  uint32_t WriteStringDatas(Stream* stream);
+  uint32_t WriteClassDatas(Stream* stream);
+  uint32_t WriteMethodHandles(Stream* stream);
+  uint32_t WriteMapItems(Stream* stream, MapItemQueue* queue);
+  uint32_t GenerateAndWriteMapItems(Stream* stream);
 
-  virtual uint32_t WriteCodeItemPostInstructionData(dex_ir::CodeItem* item,
-                                                    uint32_t offset,
+  virtual uint32_t WriteCodeItemPostInstructionData(Stream* stream,
+                                                    dex_ir::CodeItem* item,
                                                     bool reserve_only);
-  virtual uint32_t WriteCodeItem(dex_ir::CodeItem* item, uint32_t offset, bool reserve_only);
+  virtual uint32_t WriteCodeItem(Stream* stream, dex_ir::CodeItem* item, bool reserve_only);
 
   // Process an offset, if compute_offset is set, write into the dex ir item, otherwise read the
   // existing offset and use that for writing.
-  void ProcessOffset(uint32_t* const offset, dex_ir::Item* item) {
-    if (compute_offsets_) {
-      item->SetOffset(*offset);
-    } else {
-      // Not computing offsets, just use the one in the item.
-      *offset = item->GetOffset();
-    }
-  }
+  void ProcessOffset(Stream* stream, dex_ir::Item* item);
 
   dex_ir::Header* const header_;
-  MemMap* const mem_map_;
   DexLayout* const dex_layout_;
   bool compute_offsets_;
 
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 3d3b121..d33a0bd 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1813,21 +1813,14 @@
   LayoutCodeItems(dex_file);
 }
 
-void DexLayout::OutputDexFile(const DexFile* dex_file, bool compute_offsets) {
-  const std::string& dex_file_location = dex_file->GetLocation();
+void DexLayout::OutputDexFile(const DexFile* input_dex_file,
+                              bool compute_offsets,
+                              std::unique_ptr<DexContainer>* dex_container) {
+  const std::string& dex_file_location = input_dex_file->GetLocation();
   std::string error_msg;
   std::unique_ptr<File> new_file;
-  // Since we allow dex growth, we need to size the map larger than the original input to be safe.
-  // Reserve an extra 10% to add some buffer room. Note that this is probably more than
-  // necessary.
-  static constexpr size_t kReserveFraction = 10;
-  // Add an extra constant amount since the compact dex header and extra tables may cause more
-  // expansion than fits in the reserve fraction for small dex files.
-  // TODO: Move to using a resizable buffer like a vector.
-  static constexpr size_t kExtraReserve = 128 * KB;
-  const size_t max_size = header_->FileSize() + kExtraReserve +
-      header_->FileSize() / kReserveFraction;
-  if (!options_.output_to_memmap_) {
+  // If options_.output_dex_directory_ is non null, we are outputting to a file.
+  if (options_.output_dex_directory_ != nullptr) {
     std::string output_location(options_.output_dex_directory_);
     size_t last_slash = dex_file_location.rfind('/');
     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
@@ -1843,32 +1836,18 @@
       LOG(ERROR) << "Could not create dex writer output file: " << output_location;
       return;
     }
-    if (ftruncate(new_file->Fd(), max_size) != 0) {
-      LOG(ERROR) << "Could not grow dex writer output file: " << output_location;;
+  }
+  DexWriter::Output(this, dex_container, compute_offsets);
+  DexContainer* const container = dex_container->get();
+  DexContainer::Section* const main_section = container->GetMainSection();
+  DexContainer::Section* const data_section = container->GetDataSection();
+  CHECK_EQ(data_section->Size(), 0u) << "Unsupported";
+  if (new_file != nullptr) {
+    if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
+      LOG(ERROR) << "Failed tow write dex file to " << dex_file_location;
       new_file->Erase();
       return;
     }
-    mem_map_.reset(MemMap::MapFile(max_size, PROT_READ | PROT_WRITE, MAP_SHARED,
-        new_file->Fd(), 0, /*low_4gb*/ false, output_location.c_str(), &error_msg));
-  } else {
-    mem_map_.reset(MemMap::MapAnonymous("layout dex", nullptr, max_size,
-        PROT_READ | PROT_WRITE, /* low_4gb */ false, /* reuse */ false, &error_msg));
-  }
-  if (mem_map_ == nullptr) {
-    LOG(ERROR) << "Could not create mem map for dex writer output: " << error_msg;
-    if (new_file != nullptr) {
-      new_file->Erase();
-    }
-    return;
-  }
-  DexWriter::Output(header_, mem_map_.get(), this, compute_offsets, options_.compact_dex_level_);
-  if (new_file != nullptr) {
-    // Since we make the memmap larger than needed, shrink the file back down to not leave extra
-    // padding.
-    int res = new_file->SetLength(header_->FileSize());
-    if (res != 0) {
-      LOG(ERROR) << "Truncating file resulted in " << res;
-    }
     UNUSED(new_file->FlushCloseOrErase());
   }
 }
@@ -1878,8 +1857,11 @@
  */
 void DexLayout::ProcessDexFile(const char* file_name,
                                const DexFile* dex_file,
-                               size_t dex_file_index) {
-  const bool output = options_.output_dex_directory_ != nullptr || options_.output_to_memmap_;
+                               size_t dex_file_index,
+                               std::unique_ptr<DexContainer>* dex_container) {
+  const bool has_output_container = dex_container != nullptr;
+  const bool output = options_.output_dex_directory_ != nullptr || has_output_container;
+
   // Try to avoid eagerly assigning offsets to find bugs since GetOffset will abort if the offset
   // is unassigned.
   bool eagerly_assign_offsets = false;
@@ -1918,22 +1900,29 @@
     if (do_layout) {
       LayoutOutputFile(dex_file);
     }
+    // The output needs a dex container, use a temporary one.
+    std::unique_ptr<DexContainer> temp_container;
+    if (dex_container == nullptr) {
+      dex_container = &temp_container;
+    }
     // If we didn't set the offsets eagerly, we definitely need to compute them here.
-    OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets);
+    OutputDexFile(dex_file, do_layout || !eagerly_assign_offsets, dex_container);
 
     // Clear header before verifying to reduce peak RAM usage.
     const size_t file_size = header_->FileSize();
     header.reset();
 
     // Verify the output dex file's structure, only enabled by default for debug builds.
-    if (options_.verify_output_) {
+    if (options_.verify_output_ && has_output_container) {
       std::string error_msg;
       std::string location = "memory mapped file for " + std::string(file_name);
       // Dex file verifier cannot handle compact dex.
       bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
       const ArtDexFileLoader dex_file_loader;
+      DexContainer::Section* section = (*dex_container)->GetMainSection();
+      DCHECK_EQ(file_size, section->Size());
       std::unique_ptr<const DexFile> output_dex_file(
-          dex_file_loader.Open(mem_map_->Begin(),
+          dex_file_loader.Open(section->Begin(),
                                file_size,
                                location,
                                /* checksum */ 0,
@@ -1988,7 +1977,8 @@
     fprintf(out_file_, "Checksum verified\n");
   } else {
     for (size_t i = 0; i < dex_files.size(); i++) {
-      ProcessDexFile(file_name, dex_files[i].get(), i);
+      // Pass in a null container to avoid output by default.
+      ProcessDexFile(file_name, dex_files[i].get(), i, /*dex_container*/ nullptr);
     }
   }
   return 0;
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index cb0eabc..00d24db 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -28,6 +28,7 @@
 #include <unordered_map>
 
 #include "dex/compact_dex_level.h"
+#include "dex_container.h"
 #include "dex/dex_file_layout.h"
 #include "dex_ir.h"
 #include "mem_map.h"
@@ -55,7 +56,7 @@
   bool disassemble_ = false;
   bool exports_only_ = false;
   bool ignore_bad_checksum_ = false;
-  bool output_to_memmap_ = false;
+  bool output_to_container_ = false;
   bool show_annotations_ = false;
   bool show_file_headers_ = false;
   bool show_section_headers_ = false;
@@ -82,6 +83,18 @@
 
 class DexLayout {
  public:
+  class VectorOutputContainer {
+   public:
+    // Begin is not necessarily aligned (for now).
+    uint8_t* Begin() {
+      return &data_[0];
+    }
+
+   private:
+    std::vector<uint8_t> data_;
+  };
+
+
   // Setting this to false disables class def layout entirely, which is stronger than strictly
   // necessary to ensure the partial order w.r.t. class derivation. TODO: Re-enable (b/68317550).
   static constexpr bool kChangeClassDefOrder = false;
@@ -89,18 +102,21 @@
   DexLayout(Options& options,
             ProfileCompilationInfo* info,
             FILE* out_file,
-            dex_ir::Header*
-            header = nullptr)
-      : options_(options), info_(info), out_file_(out_file), header_(header) { }
+            dex_ir::Header* header)
+      : options_(options),
+        info_(info),
+        out_file_(out_file),
+        header_(header) { }
 
   int ProcessFile(const char* file_name);
-  void ProcessDexFile(const char* file_name, const DexFile* dex_file, size_t dex_file_index);
+  void ProcessDexFile(const char* file_name,
+                      const DexFile* dex_file,
+                      size_t dex_file_index,
+                      std::unique_ptr<DexContainer>* dex_container);
 
   dex_ir::Header* GetHeader() const { return header_; }
   void SetHeader(dex_ir::Header* header) { header_ = header; }
 
-  MemMap* GetAndReleaseMemMap() { return mem_map_.release(); }
-
   DexLayoutSections& GetSections() {
     return dex_sections_;
   }
@@ -150,7 +166,9 @@
   // Creates a new layout for the dex file based on profile info.
   // Currently reorders ClassDefs, ClassDataItems, and CodeItems.
   void LayoutOutputFile(const DexFile* dex_file);
-  void OutputDexFile(const DexFile* dex_file, bool compute_offsets);
+  void OutputDexFile(const DexFile* input_dex_file,
+                     bool compute_offsets,
+                     std::unique_ptr<DexContainer>* dex_container);
 
   void DumpCFG(const DexFile* dex_file, int idx);
   void DumpCFG(const DexFile* dex_file, uint32_t dex_method_idx, const DexFile::CodeItem* code);
@@ -159,7 +177,6 @@
   ProfileCompilationInfo* info_;
   FILE* out_file_;
   dex_ir::Header* header_;
-  std::unique_ptr<MemMap> mem_map_;
   DexLayoutSections dex_sections_;
   // Layout hotness information is only calculated when dexlayout is enabled.
   DexLayoutHotnessInfo layout_hotness_info_;
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 83fb99a..ece0f93 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -80,7 +80,7 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "abcdefghil:mo:p:stvw:x:");
+    const int ic = getopt(argc, argv, "abcdefghil:o:p:stvw:x:");
     if (ic < 0) {
       break;  // done
     }
@@ -119,9 +119,6 @@
           want_usage = true;
         }
         break;
-      case 'm':  // output dex files to a memmap
-        options.output_to_memmap_ = true;
-        break;
       case 'o':  // output file
         options.output_file_name_ = optarg;
         break;
@@ -197,7 +194,7 @@
   }
 
   // Create DexLayout instance.
-  DexLayout dex_layout(options, profile_info.get(), out_file);
+  DexLayout dex_layout(options, profile_info.get(), out_file, /*header*/ nullptr);
 
   // Process all files supplied on command line.
   int result = 0;
diff --git a/openjdkjvmti/fixed_up_dex_file.cc b/openjdkjvmti/fixed_up_dex_file.cc
index dcc834a..323137a 100644
--- a/openjdkjvmti/fixed_up_dex_file.cc
+++ b/openjdkjvmti/fixed_up_dex_file.cc
@@ -35,6 +35,7 @@
 #include "dex/dex_file_loader.h"
 
 // Runtime includes.
+#include "dex_container.h"
 #include "dex/compact_dex_level.h"
 #include "dex_to_dex_decompiler.h"
 #include "dexlayout.h"
@@ -92,18 +93,21 @@
   if (original.IsCompactDexFile()) {
     // Since we are supposed to return a standard dex, convert back using dexlayout.
     art::Options options;
-    options.output_to_memmap_ = true;
     options.compact_dex_level_ = art::CompactDexLevel::kCompactDexLevelNone;
     options.update_checksum_ = true;
-    art::DexLayout dex_layout(options, nullptr, nullptr);
-    dex_layout.ProcessDexFile(new_dex_file->GetLocation().c_str(), new_dex_file.get(), 0);
-    std::unique_ptr<art::MemMap> mem_map(dex_layout.GetAndReleaseMemMap());
-
-    const uint32_t dex_file_size =
-        reinterpret_cast<const art::DexFile::Header*>(mem_map->Begin())->file_size_;
+    art::DexLayout dex_layout(options,
+                              /*info*/ nullptr,
+                              /*out_file*/ nullptr,
+                              /*header*/ nullptr);
+    std::unique_ptr<art::DexContainer> dex_container;
+    dex_layout.ProcessDexFile(new_dex_file->GetLocation().c_str(),
+                              new_dex_file.get(),
+                              0,
+                              &dex_container);
+    art::DexContainer::Section* main_section = dex_container->GetMainSection();
     // Overwrite the dex file stored in data with the new result.
     data.clear();
-    data.insert(data.end(), mem_map->Begin(), mem_map->Begin() + dex_file_size);
+    data.insert(data.end(), main_section->Begin(), main_section->End());
     new_dex_file = dex_file_loader.Open(
         data.data(),
         data.size(),
diff --git a/profman/boot_image_profile.cc b/profman/boot_image_profile.cc
index a750105..3d003a7 100644
--- a/profman/boot_image_profile.cc
+++ b/profman/boot_image_profile.cc
@@ -19,6 +19,7 @@
 
 #include "boot_image_profile.h"
 #include "dex/dex_file-inl.h"
+#include "jit/profile_compilation_info.h"
 #include "method_reference.h"
 #include "type_reference.h"
 
diff --git a/profman/boot_image_profile.h b/profman/boot_image_profile.h
index eb43b7c..99e5a75 100644
--- a/profman/boot_image_profile.h
+++ b/profman/boot_image_profile.h
@@ -22,10 +22,11 @@
 #include <vector>
 
 #include "dex/dex_file.h"
-#include "jit/profile_compilation_info.h"
 
 namespace art {
 
+class ProfileCompilationInfo;
+
 struct BootImageOptions {
  public:
   // Threshold for classes that may be dirty or clean. The threshold specifies how
diff --git a/runtime/Android.bp b/runtime/Android.bp
index ac2c625..aba2b0e 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -714,6 +714,7 @@
     ],
     shared_libs: [
         "libbacktrace",
+        "libziparchive",
     ],
     header_libs: [
         "art_cmdlineparser_headers", // For parsed_options_test.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 737d2a8..1671a24 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -794,6 +794,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
+
     push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
     .cfi_adjust_cfa_offset 16
     .cfi_rel_offset r0, 0
@@ -812,6 +815,7 @@
     .cfi_restore r2
     .cfi_restore lr
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index b0e7b0a..0614118 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1333,6 +1333,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    cbz x1, .Lthrow_class_cast_exception_for_bitstring_check
+
     // Store arguments and link register
     // Stack needs to be 16B aligned on calls.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
@@ -1358,6 +1361,7 @@
     // Restore
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
     bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 1020781..3a6625f 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -54,6 +54,7 @@
       entrypoint == kQuickAsin ||
       entrypoint == kQuickAtan ||
       entrypoint == kQuickAtan2 ||
+      entrypoint == kQuickPow ||
       entrypoint == kQuickCbrt ||
       entrypoint == kQuickCosh ||
       entrypoint == kQuickExp ||
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index b2f7e10..d8fe480 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1423,6 +1423,10 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    beqz   $a1, .Lthrow_class_cast_exception_for_bitstring_check
+    nop
+
     addiu  $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sw     $gp, 16($sp)
@@ -1441,12 +1445,15 @@
     jalr   $zero, $ra
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
 .Lthrow_class_cast_exception:
     lw     $t9, 8($sp)
     lw     $a1, 4($sp)
     lw     $a0, 0($sp)
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     la   $t9, artThrowClassCastExceptionForObject
     jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 63f4f6c..a5edc1f 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1364,6 +1364,9 @@
     .extern artInstanceOfFromCode
     .extern artThrowClassCastExceptionForObject
 ENTRY art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    beqzc  $a1, .Lthrow_class_cast_exception_for_bitstring_check
+
     daddiu $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sd     $ra, 24($sp)
@@ -1379,12 +1382,15 @@
     jalr   $zero, $ra
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
 .Lthrow_class_cast_exception:
     ld     $t9, 16($sp)
     ld     $a1, 8($sp)
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     dla  $t9, artThrowClassCastExceptionForObject
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 5a28120..d64e2fd 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1431,6 +1431,10 @@
 END_FUNCTION art_quick_instance_of
 
 DEFINE_FUNCTION art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    testl %ecx, %ecx
+    jz .Lthrow_class_cast_exception_for_bitstring_check
+
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - checked class
     PUSH eax                              // pass arg1 - obj
@@ -1448,6 +1452,7 @@
     addl LITERAL(4), %esp
     CFI_ADJUST_CFA_OFFSET(-4)
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
     // Outgoing argument set up
     PUSH eax                              // alignment padding
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 781ade9..81ad780 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1402,6 +1402,10 @@
 END_FUNCTION art_quick_unlock_object_no_inline
 
 DEFINE_FUNCTION art_quick_check_instance_of
+    // Type check using the bit string passes null as the target class. In that case just throw.
+    testl %esi, %esi
+    jz .Lthrow_class_cast_exception_for_bitstring_check
+
     // We could check the super classes here but that is usually already checked in the caller.
     PUSH rdi                          // Save args for exc
     PUSH rsi
@@ -1425,6 +1429,7 @@
     POP rsi                           // Pop arguments
     POP rdi
 
+.Lthrow_class_cast_exception_for_bitstring_check:
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
     call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index e87f631..0fcf394 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -56,6 +56,7 @@
   "CtorFenceIns ",
   "InvokeInputs ",
   "PhiInputs    ",
+  "TypeCheckIns ",
   "LoopInfo     ",
   "LIBackEdges  ",
   "TryCatchInf  ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index beaba67..5f3fc02a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -62,6 +62,7 @@
   kArenaAllocConstructorFenceInputs,
   kArenaAllocInvokeInputs,
   kArenaAllocPhiInputs,
+  kArenaAllocTypeCheckInputs,
   kArenaAllocLoopInfo,
   kArenaAllocLoopInfoBackEdges,
   kArenaAllocTryCatchInfo,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index af45a69..e7ee9f2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4482,6 +4482,14 @@
 
   Runtime::Current()->GetRuntimeCallbacks()->ClassPrepare(temp_klass, klass);
 
+  // SubtypeCheckInfo::Initialized must happen-before any new-instance for that type.
+  // See also ClassLinker::EnsureInitialized().
+  {
+    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+    SubtypeCheck<ObjPtr<mirror::Class>>::EnsureInitialized(klass.Get());
+    // TODO: Avoid taking subtype_check_lock_ if SubtypeCheck for j.l.r.Proxy is already assigned.
+  }
+
   {
     // Lock on klass is released. Lock new class object.
     ObjectLock<mirror::Class> initialization_lock(self, klass);
diff --git a/runtime/dex/dex_file_verifier.cc b/runtime/dex/dex_file_verifier.cc
index 7265aad..5800bb1 100644
--- a/runtime/dex/dex_file_verifier.cc
+++ b/runtime/dex/dex_file_verifier.cc
@@ -453,6 +453,7 @@
 
   uint32_t count = map->size_;
   uint32_t last_offset = 0;
+  uint32_t last_type = 0;
   uint32_t data_item_count = 0;
   uint32_t data_items_left = header_->data_size_;
   uint32_t used_bits = 0;
@@ -465,7 +466,11 @@
   // Check the items listed in the map.
   for (uint32_t i = 0; i < count; i++) {
     if (UNLIKELY(last_offset >= item->offset_ && i != 0)) {
-      ErrorStringPrintf("Out of order map item: %x then %x", last_offset, item->offset_);
+      ErrorStringPrintf("Out of order map item: %x then %x for type %x last type was %x",
+                        last_offset,
+                        item->offset_,
+                        static_cast<uint32_t>(item->type_),
+                        last_type);
       return false;
     }
     if (UNLIKELY(item->offset_ >= header_->file_size_)) {
@@ -501,6 +506,7 @@
 
     used_bits |= bit;
     last_offset = item->offset_;
+    last_type = item->type_;
     item++;
   }
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 565b4ed..4b26bee 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -15,8 +15,11 @@
  */
 
 #include "callee_save_frame.h"
+#include "dex/code_item_accessors-inl.h"
+#include "dex/dex_instruction-inl.h"
 #include "common_throws.h"
 #include "mirror/object-inl.h"
+#include "nth_caller_visitor.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -111,6 +114,21 @@
                                                      Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  if (dest_type == nullptr) {
+    // Find the target class for check cast using the bitstring check (dest_type == null).
+    NthCallerVisitor visitor(self, 0u);
+    visitor.WalkStack();
+    DCHECK(visitor.caller != nullptr);
+    uint32_t dex_pc = visitor.GetDexPc();
+    CodeItemDataAccessor accessor(visitor.caller);
+    const Instruction& check_cast = accessor.InstructionAt(dex_pc);
+    DCHECK_EQ(check_cast.Opcode(), Instruction::CHECK_CAST);
+    dex::TypeIndex type_index(check_cast.VRegB_21c());
+    ClassLinker* linker = Runtime::Current()->GetClassLinker();
+    dest_type = linker->LookupResolvedType(type_index, visitor.caller).Ptr();
+    CHECK(dest_type != nullptr) << "Target class should have been previously resolved: "
+        << visitor.caller->GetDexFile()->PrettyType(type_index);
+  }
   DCHECK(!dest_type->IsAssignableFrom(src_type));
   ThrowClassCastException(dest_type, src_type);
   self->QuickDeliverException();
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 791c338..6d27cfe 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -23,7 +23,6 @@
 #include "base/timing_logger.h"
 #include "jit/profile_saver_options.h"
 #include "obj_ptr.h"
-#include "profile_compilation_info.h"
 #include "thread_pool.h"
 
 namespace art {
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 74bf237..33fa0d6 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -47,6 +47,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "utils.h"
+#include "zip_archive.h"
 
 namespace art {
 
@@ -56,6 +57,10 @@
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
 
+// The name of the profile entry in the dex metadata file.
+// DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
+const char* ProfileCompilationInfo::kDexMetadataProfileEntry = "primary.prof";
+
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
 // Debug flag to ignore checksums when testing if a method or a class is present in the profile.
@@ -194,7 +199,7 @@
 
   int fd = profile_file->Fd();
 
-  ProfileLoadSatus status = LoadInternal(fd, &error);
+  ProfileLoadStatus status = LoadInternal(fd, &error);
   if (status == kProfileLoadSuccess) {
     return true;
   }
@@ -225,7 +230,7 @@
 
   int fd = profile_file->Fd();
 
-  ProfileLoadSatus status = LoadInternal(fd, &error);
+  ProfileLoadStatus status = LoadInternal(fd, &error);
   if (status == kProfileLoadSuccess) {
     return true;
   }
@@ -883,25 +888,13 @@
   return false;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
-      int fd,
-      const std::string& source,
-      /*out*/std::string* error) {
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::SafeBuffer::Fill(
+      ProfileSource& source,
+      const std::string& debug_stage,
+      /*out*/ std::string* error) {
   size_t byte_count = (ptr_end_ - ptr_current_) * sizeof(*ptr_current_);
   uint8_t* buffer = ptr_current_;
-  while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
-    if (bytes_read == 0) {
-      *error += "Profile EOF reached prematurely for " + source;
-      return kProfileLoadBadData;
-    } else if (bytes_read < 0) {
-      *error += "Profile IO error for " + source + strerror(errno);
-      return kProfileLoadIOError;
-    }
-    byte_count -= bytes_read;
-    buffer += bytes_read;
-  }
-  return kProfileLoadSuccess;
+  return source.Read(buffer, byte_count, debug_stage, error);
 }
 
 size_t ProfileCompilationInfo::SafeBuffer::CountUnreadBytes() {
@@ -916,8 +909,8 @@
   ptr_current_ += data_size;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
-      int fd,
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileHeader(
+      ProfileSource& source,
       /*out*/uint8_t* number_of_dex_files,
       /*out*/uint32_t* uncompressed_data_size,
       /*out*/uint32_t* compressed_data_size,
@@ -932,7 +925,7 @@
 
   SafeBuffer safe_buffer(kMagicVersionSize);
 
-  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  ProfileLoadStatus status = safe_buffer.Fill(source, "ReadProfileHeader", error);
   if (status != kProfileLoadSuccess) {
     return status;
   }
@@ -972,7 +965,7 @@
   return true;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileLineHeader(
     SafeBuffer& buffer,
     /*out*/ProfileLineHeader* line_header,
     /*out*/std::string* error) {
@@ -1003,7 +996,7 @@
   return kProfileLoadSuccess;
 }
 
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ReadProfileLine(
       SafeBuffer& buffer,
       uint8_t number_of_dex_files,
       const ProfileLineHeader& line_header,
@@ -1046,7 +1039,7 @@
 bool ProfileCompilationInfo::Load(int fd, bool merge_classes) {
   std::string error;
 
-  ProfileLoadSatus status = LoadInternal(fd, &error, merge_classes);
+  ProfileLoadStatus status = LoadInternal(fd, &error, merge_classes);
 
   if (status == kProfileLoadSuccess) {
     return true;
@@ -1148,31 +1141,136 @@
   return true;
 }
 
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::OpenSource(
+    int32_t fd,
+    /*out*/ std::unique_ptr<ProfileSource>* source,
+    /*out*/ std::string* error) {
+  if (IsProfileFile(fd)) {
+    source->reset(ProfileSource::Create(fd));
+    return kProfileLoadSuccess;
+  } else {
+    std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, "profile", error));
+    if (zip_archive.get() == nullptr) {
+      *error = "Could not open the profile zip archive";
+      return kProfileLoadBadData;
+    }
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kDexMetadataProfileEntry, error));
+    if (zip_entry == nullptr) {
+      // Allow archives without the profile entry. In this case, create an empty profile.
+      // This gives more flexible when ure-using archives that may miss the entry.
+      // (e.g. dex metadata files)
+      LOG(WARNING) << std::string("Could not find entry ") + kDexMetadataProfileEntry +
+            " in the zip archive. Creating an empty profile.";
+      source->reset(ProfileSource::Create(nullptr));
+      return kProfileLoadSuccess;
+    }
+    if (zip_entry->GetUncompressedLength() == 0) {
+      *error = "Empty profile entry in the zip archive.";
+      return kProfileLoadBadData;
+    }
+
+    std::unique_ptr<MemMap> map;
+    if (zip_entry->IsUncompressed()) {
+      // Map uncompressed files within zip as file-backed to avoid a dirty copy.
+      map.reset(zip_entry->MapDirectlyFromFile(kDexMetadataProfileEntry, error));
+      if (map == nullptr) {
+        LOG(WARNING) << "Can't mmap profile directly; "
+                     << "is your ZIP file corrupted? Falling back to extraction.";
+        // Try again with Extraction which still has a chance of recovery.
+      }
+    }
+
+    if (map == nullptr) {
+      // Default path for compressed ZIP entries, and fallback for stored ZIP entries.
+      // TODO(calin) pass along file names to assist with debugging.
+      map.reset(zip_entry->ExtractToMemMap("profile file", kDexMetadataProfileEntry, error));
+    }
+
+    if (map != nullptr) {
+      source->reset(ProfileSource::Create(std::move(map)));
+      return kProfileLoadSuccess;
+    } else {
+      return kProfileLoadBadData;
+    }
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::ProfileSource::Read(
+    uint8_t* buffer,
+    size_t byte_count,
+    const std::string& debug_stage,
+    std::string* error) {
+  if (IsMemMap()) {
+    if (mem_map_cur_ + byte_count > mem_map_->Size()) {
+      return kProfileLoadBadData;
+    }
+    for (size_t i = 0; i < byte_count; i++) {
+      buffer[i] = *(mem_map_->Begin() + mem_map_cur_);
+      mem_map_cur_++;
+    }
+  } else {
+    while (byte_count > 0) {
+      int bytes_read = TEMP_FAILURE_RETRY(read(fd_, buffer, byte_count));;
+      if (bytes_read == 0) {
+        *error += "Profile EOF reached prematurely for " + debug_stage;
+        return kProfileLoadBadData;
+      } else if (bytes_read < 0) {
+        *error += "Profile IO error for " + debug_stage + strerror(errno);
+        return kProfileLoadIOError;
+      }
+      byte_count -= bytes_read;
+      buffer += bytes_read;
+    }
+  }
+  return kProfileLoadSuccess;
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasConsumedAllData() const {
+  return IsMemMap()
+      ? (mem_map_ == nullptr || mem_map_cur_ == mem_map_->Size())
+      : (testEOF(fd_) == 0);
+}
+
+bool ProfileCompilationInfo::ProfileSource::HasEmptyContent() const {
+  if (IsMemMap()) {
+    return mem_map_ == nullptr || mem_map_->Size() == 0;
+  } else {
+    struct stat stat_buffer;
+    if (fstat(fd_, &stat_buffer) != 0) {
+      return false;
+    }
+    return stat_buffer.st_size == 0;
+  }
+}
+
 // TODO(calin): fail fast if the dex checksums don't match.
-ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
-      int fd, std::string* error, bool merge_classes) {
+ProfileCompilationInfo::ProfileLoadStatus ProfileCompilationInfo::LoadInternal(
+      int32_t fd, std::string* error, bool merge_classes) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  struct stat stat_buffer;
-  if (fstat(fd, &stat_buffer) != 0) {
-    return kProfileLoadIOError;
+  std::unique_ptr<ProfileSource> source;
+  ProfileLoadStatus status = OpenSource(fd, &source, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
   }
+
   // We allow empty profile files.
   // Profiles may be created by ActivityManager or installd before we manage to
   // process them in the runtime or profman.
-  if (stat_buffer.st_size == 0) {
+  if (source->HasEmptyContent()) {
     return kProfileLoadSuccess;
   }
+
   // Read profile header: magic + version + number_of_dex_files.
   uint8_t number_of_dex_files;
   uint32_t uncompressed_data_size;
   uint32_t compressed_data_size;
-  ProfileLoadSatus status = ReadProfileHeader(fd,
-                                              &number_of_dex_files,
-                                              &uncompressed_data_size,
-                                              &compressed_data_size,
-                                              error);
+  status = ReadProfileHeader(*source,
+                             &number_of_dex_files,
+                             &uncompressed_data_size,
+                             &compressed_data_size,
+                             error);
 
   if (status != kProfileLoadSuccess) {
     return status;
@@ -1192,16 +1290,14 @@
   }
 
   std::unique_ptr<uint8_t[]> compressed_data(new uint8_t[compressed_data_size]);
-  bool bytes_read_success =
-      android::base::ReadFully(fd, compressed_data.get(), compressed_data_size);
-
-  if (testEOF(fd) != 0) {
-    *error += "Unexpected data in the profile file.";
-    return kProfileLoadBadData;
+  status = source->Read(compressed_data.get(), compressed_data_size, "ReadContent", error);
+  if (status != kProfileLoadSuccess) {
+    *error += "Unable to read compressed profile data";
+    return status;
   }
 
-  if (!bytes_read_success) {
-    *error += "Unable to read compressed profile data";
+  if (!source->HasConsumedAllData()) {
+    *error += "Unexpected data in the profile file.";
     return kProfileLoadBadData;
   }
 
@@ -1904,4 +2000,34 @@
   return ret;
 }
 
+bool ProfileCompilationInfo::IsProfileFile(int fd) {
+  // First check if it's an empty file as we allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return false;
+  }
+
+  if (stat_buffer.st_size == 0) {
+    return true;
+  }
+
+  // The files is not empty. Check if it contains the profile magic.
+  size_t byte_count = sizeof(kProfileMagic);
+  uint8_t buffer[sizeof(kProfileMagic)];
+  if (!android::base::ReadFully(fd, buffer, byte_count)) {
+    return false;
+  }
+
+  // Reset the offset to prepare the file for reading.
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to reset the offset";
+    return false;
+  }
+
+  return memcmp(buffer, kProfileMagic, byte_count) == 0;
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 7c30dee..29a4c11 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -28,6 +28,7 @@
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
 #include "method_reference.h"
+#include "mem_map.h"
 #include "safe_map.h"
 #include "type_reference.h"
 
@@ -71,6 +72,8 @@
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
 
+  static const char* kDexMetadataProfileEntry;
+
   // Data structures for encoding the offline representation of inline caches.
   // This is exposed as public in order to make it available to dex2oat compilations
   // (see compiler/optimizing/inliner.cc).
@@ -410,8 +413,11 @@
   // Return all of the class descriptors in the profile for a set of dex files.
   std::unordered_set<std::string> GetClassDescriptors(const std::vector<const DexFile*>& dex_files);
 
+  // Return true if the fd points to a profile file.
+  bool IsProfileFile(int fd);
+
  private:
-  enum ProfileLoadSatus {
+  enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
     kProfileLoadIOError,
     kProfileLoadVersionMismatch,
@@ -577,6 +583,58 @@
     uint32_t num_method_ids;
   };
 
+  /**
+   * Encapsulate the source of profile data for loading.
+   * The source can be either a plain file or a zip file.
+   * For zip files, the profile entry will be extracted to
+   * the memory map.
+   */
+  class ProfileSource {
+   public:
+    /**
+     * Create a profile source for the given fd. The ownership of the fd
+     * remains to the caller; as this class will not attempt to close it at any
+     * point.
+     */
+    static ProfileSource* Create(int32_t fd) {
+      DCHECK_GT(fd, -1);
+      return new ProfileSource(fd, /*map*/ nullptr);
+    }
+
+    /**
+     * Create a profile source backed by a memory map. The map can be null in
+     * which case it will the treated as an empty source.
+     */
+    static ProfileSource* Create(std::unique_ptr<MemMap>&& mem_map) {
+      return new ProfileSource(/*fd*/ -1, std::move(mem_map));
+    }
+
+    /**
+     * Read bytes from this source.
+     * Reading will advance the current source position so subsequent
+     * invocations will read from the las position.
+     */
+    ProfileLoadStatus Read(uint8_t* buffer,
+                           size_t byte_count,
+                           const std::string& debug_stage,
+                           std::string* error);
+
+    /** Return true if the source has 0 data. */
+    bool HasEmptyContent() const;
+    /** Return true if all the information from this source has been read. */
+    bool HasConsumedAllData() const;
+
+   private:
+    ProfileSource(int32_t fd, std::unique_ptr<MemMap>&& mem_map)
+        : fd_(fd), mem_map_(std::move(mem_map)), mem_map_cur_(0) {}
+
+    bool IsMemMap() const { return fd_ == -1; }
+
+    int32_t fd_;  // The fd is not owned by this class.
+    std::unique_ptr<MemMap> mem_map_;
+    size_t mem_map_cur_;  // Current position in the map to read from.
+  };
+
   // A helper structure to make sure we don't read past our buffers in the loops.
   struct SafeBuffer {
    public:
@@ -586,13 +644,9 @@
     }
 
     // Reads the content of the descriptor at the current position.
-    ProfileLoadSatus FillFromFd(int fd,
-                                const std::string& source,
-                                /*out*/std::string* error);
-
-    ProfileLoadSatus FillFromBuffer(uint8_t* buffer_ptr,
-                                    const std::string& source,
-                                    /*out*/std::string* error);
+    ProfileLoadStatus Fill(ProfileSource& source,
+                           const std::string& debug_stage,
+                           /*out*/std::string* error);
 
     // Reads an uint value (high bits to low bits) and advances the current pointer
     // with the number of bits read.
@@ -620,21 +674,27 @@
     uint8_t* ptr_current_;
   };
 
+  ProfileLoadStatus OpenSource(int32_t fd,
+                               /*out*/ std::unique_ptr<ProfileSource>* source,
+                               /*out*/ std::string* error);
+
   // Entry point for profile loding functionality.
-  ProfileLoadSatus LoadInternal(int fd, std::string* error, bool merge_classes = true);
+  ProfileLoadStatus LoadInternal(int32_t fd,
+                                 std::string* error,
+                                 bool merge_classes = true);
 
   // Read the profile header from the given fd and store the number of profile
   // lines into number_of_dex_files.
-  ProfileLoadSatus ReadProfileHeader(int fd,
-                                     /*out*/uint8_t* number_of_dex_files,
-                                     /*out*/uint32_t* size_uncompressed_data,
-                                     /*out*/uint32_t* size_compressed_data,
-                                     /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileHeader(ProfileSource& source,
+                                      /*out*/uint8_t* number_of_dex_files,
+                                      /*out*/uint32_t* size_uncompressed_data,
+                                      /*out*/uint32_t* size_compressed_data,
+                                      /*out*/std::string* error);
 
   // Read the header of a profile line from the given fd.
-  ProfileLoadSatus ReadProfileLineHeader(SafeBuffer& buffer,
-                                         /*out*/ProfileLineHeader* line_header,
-                                         /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileLineHeader(SafeBuffer& buffer,
+                                          /*out*/ProfileLineHeader* line_header,
+                                          /*out*/std::string* error);
 
   // Read individual elements from the profile line header.
   bool ReadProfileLineHeaderElements(SafeBuffer& buffer,
@@ -643,12 +703,12 @@
                                      /*out*/std::string* error);
 
   // Read a single profile line from the given fd.
-  ProfileLoadSatus ReadProfileLine(SafeBuffer& buffer,
-                                   uint8_t number_of_dex_files,
-                                   const ProfileLineHeader& line_header,
-                                   const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
-                                   bool merge_classes,
-                                   /*out*/std::string* error);
+  ProfileLoadStatus ReadProfileLine(SafeBuffer& buffer,
+                                    uint8_t number_of_dex_files,
+                                    const ProfileLineHeader& line_header,
+                                    const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
+                                    bool merge_classes,
+                                    /*out*/std::string* error);
 
   // Read all the classes from the buffer into the profile `info_` structure.
   bool ReadClasses(SafeBuffer& buffer,
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 08042cc..6ce9bcb 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <stdio.h>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -29,6 +30,7 @@
 #include "mirror/class_loader.h"
 #include "scoped_thread_state_change-inl.h"
 #include "type_reference.h"
+#include "ziparchive/zip_writer.h"
 
 namespace art {
 
@@ -268,6 +270,50 @@
     }
   }
 
+  void TestProfileLoadFromZip(const char* zip_entry,
+                              size_t zip_flags,
+                              bool should_succeed,
+                              bool should_succeed_with_empty_profile = false) {
+    // Create a valid profile.
+    ScratchFile profile;
+    ProfileCompilationInfo saved_info;
+    for (uint16_t i = 0; i < 10; i++) {
+      ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+      ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    }
+    ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+
+    // Prepare the profile content for zipping.
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+    uint64_t data_size = profile.GetFile()->GetLength();
+    std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+    ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+    // Zip the profile content.
+    ScratchFile zip;
+    FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+    ZipWriter writer(file);
+    writer.StartEntry(zip_entry, zip_flags);
+    writer.WriteBytes(data.get(), data_size);
+    writer.FinishEntry();
+    writer.Finish();
+    fflush(file);
+    fclose(file);
+
+    // Verify loading from the zip archive.
+    ProfileCompilationInfo loaded_info;
+    ASSERT_TRUE(zip.GetFile()->ResetOffset());
+    ASSERT_EQ(should_succeed, loaded_info.Load(zip.GetFile()->GetPath(), false));
+    if (should_succeed) {
+      if (should_succeed_with_empty_profile) {
+        ASSERT_TRUE(loaded_info.IsEmpty());
+      } else {
+        ASSERT_TRUE(loaded_info.Equals(saved_info));
+      }
+    }
+  }
+
   // Cannot sizeof the actual arrays so hard code the values here.
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
@@ -934,4 +980,64 @@
   }
 }
 
+TEST_F(ProfileCompilationInfoTest, LoadFromZipCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kCompress | ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnCompress) {
+  TestProfileLoadFromZip("primary.prof",
+                         ZipWriter::kAlign32,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipUnAligned) {
+  TestProfileLoadFromZip("primary.prof",
+                         0,
+                         /*should_succeed*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadZipEntry) {
+  TestProfileLoadFromZip("invalid.profile.entry",
+                         0,
+                         /*should_succeed*/true,
+                         /*should_succeed_with_empty_profile*/true);
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFromZipFailBadProfile) {
+  // Create a bad profile.
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Prepare the profile content for zipping.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  uint64_t data_size = profile.GetFile()->GetLength();
+  std::unique_ptr<uint8_t> data(new uint8_t[data_size]);
+  ASSERT_TRUE(profile.GetFile()->ReadFully(data.get(), data_size));
+
+  // Zip the profile content.
+  ScratchFile zip;
+  FILE* file = fopen(zip.GetFile()->GetPath().c_str(), "wb");
+  ZipWriter writer(file);
+  writer.StartEntry("primary.prof", ZipWriter::kCompress | ZipWriter::kAlign32);
+  writer.WriteBytes(data.get(), data_size);
+  writer.FinishEntry();
+  writer.Finish();
+  fflush(file);
+  fclose(file);
+
+  // Check that we failed to load.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(zip.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(zip)));
+}
+
 }  // namespace art
diff --git a/runtime/subtype_check.h b/runtime/subtype_check.h
index 54d2f00..03a6d9c 100644
--- a/runtime/subtype_check.h
+++ b/runtime/subtype_check.h
@@ -283,6 +283,17 @@
     return SubtypeCheckInfo::kUninitialized;
   }
 
+  // Retrieve the state of this class's SubtypeCheckInfo.
+  //
+  // Cost: O(Depth(Class)).
+  //
+  // Returns: The precise SubtypeCheckInfo::State.
+  static SubtypeCheckInfo::State GetState(ClassPtr klass)
+      REQUIRES(Locks::subtype_check_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetSubtypeCheckInfo(klass).GetState();
+  }
+
   // Retrieve the path to root bitstring as a plain uintN_t value that is amenable to
   // be used by a fast check "encoded_src & mask_target == encoded_target".
   //
@@ -305,8 +316,9 @@
   static BitString::StorageType GetEncodedPathToRootForTarget(ClassPtr klass)
       REQUIRES(Locks::subtype_check_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK_EQ(SubtypeCheckInfo::kAssigned, GetSubtypeCheckInfo(klass).GetState());
-    return GetSubtypeCheckInfo(klass).GetEncodedPathToRoot();
+    SubtypeCheckInfo sci = GetSubtypeCheckInfo(klass);
+    DCHECK_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());
+    return sci.GetEncodedPathToRoot();
   }
 
   // Retrieve the path to root bitstring mask as a plain uintN_t value that is amenable to
@@ -318,8 +330,9 @@
   static BitString::StorageType GetEncodedPathToRootMask(ClassPtr klass)
       REQUIRES(Locks::subtype_check_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK_EQ(SubtypeCheckInfo::kAssigned, GetSubtypeCheckInfo(klass).GetState());
-    return GetSubtypeCheckInfo(klass).GetEncodedPathToRootMask();
+    SubtypeCheckInfo sci = GetSubtypeCheckInfo(klass);
+    DCHECK_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());
+    return sci.GetEncodedPathToRootMask();
   }
 
   // Is the source class a subclass of the target?
diff --git a/test/670-bitstring-type-check/build b/test/670-bitstring-type-check/build
new file mode 100644
index 0000000..38307f2
--- /dev/null
+++ b/test/670-bitstring-type-check/build
@@ -0,0 +1,216 @@
+#!/bin/bash
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# Write out the source file.
+
+mkdir src
+cat >src/Main.java <<EOF
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+EOF
+
+for i in {0..8192}; do echo "class Level1Class$i { }" >>src/Main.java; done
+for i in {0..1024}; do echo "class Level2Class$i extends Level1Class0 { }" >>src/Main.java; done
+
+cat >>src/Main.java <<EOF
+class Level3Class0 extends Level2Class0 { }
+class Level4Class0 extends Level3Class0 { }
+class Level5Class0 extends Level4Class0 { }
+class Level6Class0 extends Level5Class0 { }
+class Level7Class0 extends Level6Class0 { }
+class Level8Class0 extends Level7Class0 { }
+class Level9Class0 extends Level8Class0 { }
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // 8193 classes at level 1 make sure we shall have an overflow if there are 13 or
+    // less bits for the level 1 character. 1025 classes at level 2 similarly guarantees
+    // an overflow if the number of bits for level 2 character is 10 or less. To test
+    // type checks also for the depth overflow, we provide a hierarchy 9 levels deep.
+
+    // Make sure the bitstrings are initialized.
+    for (int i = 0; i <= 8192; ++i) {
+      Class.forName("Level1Class" + i).newInstance();
+    }
+    for (int i = 0; i <= 1024; ++i) {
+      Class.forName("Level2Class" + i).newInstance();
+    }
+
+    // Note: Using a different class for tests so that verification of Main.main() does
+    // not try to resolve classes used by the tests. This guarantees uninitialized type
+    // check bitstrings when we enter Main.main() and start initializing them above.
+    Helper.testInstanceOf();
+    Helper.testCheckCast();
+  }
+}
+
+class Helper {
+  public static void testInstanceOf() throws Exception {
+    for (int i = 1; i <= 9; ++i) {
+      Object o = createInstance("Level" + i + "Class0");
+      assertTrue(o instanceof Level1Class0);
+      if (o instanceof Level2Class0) {
+        assertFalse(i < 2);
+      } else {
+        assertTrue(i < 2);
+      }
+      if (o instanceof Level3Class0) {
+        assertFalse(i < 3);
+      } else {
+        assertTrue(i < 3);
+      }
+      if (o instanceof Level4Class0) {
+        assertFalse(i < 4);
+      } else {
+        assertTrue(i < 4);
+      }
+      if (o instanceof Level5Class0) {
+        assertFalse(i < 5);
+      } else {
+        assertTrue(i < 5);
+      }
+      if (o instanceof Level6Class0) {
+        assertFalse(i < 6);
+      } else {
+        assertTrue(i < 6);
+      }
+      if (o instanceof Level7Class0) {
+        assertFalse(i < 7);
+      } else {
+        assertTrue(i < 7);
+      }
+      if (o instanceof Level8Class0) {
+        assertFalse(i < 8);
+      } else {
+        assertTrue(i < 8);
+      }
+      if (o instanceof Level9Class0) {
+        assertFalse(i < 9);
+      } else {
+        assertTrue(i < 9);
+      }
+    }
+
+    assertTrue(createInstance("Level1Class8192") instanceof Level1Class8192);
+    assertFalse(createInstance("Level1Class8192") instanceof Level1Class0);
+    assertTrue(createInstance("Level2Class1024") instanceof Level2Class1024);
+    assertTrue(createInstance("Level2Class1024") instanceof Level1Class0);
+    assertFalse(createInstance("Level2Class1024") instanceof Level2Class0);
+  }
+
+  public static void testCheckCast() throws Exception {
+    for (int i = 1; i <= 9; ++i) {
+      Object o = createInstance("Level" + i + "Class0");
+      Level1Class0 l1c0 = (Level1Class0) o;
+      try {
+        Level2Class0 l2c0 = (Level2Class0) o;
+        assertFalse(i < 2);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 2);
+      }
+      try {
+        Level3Class0 l3c0 = (Level3Class0) o;
+        assertFalse(i < 3);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 3);
+      }
+      try {
+        Level4Class0 l4c0 = (Level4Class0) o;
+        assertFalse(i < 4);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 4);
+      }
+      try {
+        Level5Class0 l5c0 = (Level5Class0) o;
+        assertFalse(i < 5);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 5);
+      }
+      try {
+        Level6Class0 l6c0 = (Level6Class0) o;
+        assertFalse(i < 6);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 6);
+      }
+      try {
+        Level7Class0 l7c0 = (Level7Class0) o;
+        assertFalse(i < 7);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 7);
+      }
+      try {
+        Level8Class0 l8c0 = (Level8Class0) o;
+        assertFalse(i < 8);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 8);
+      }
+      try {
+        Level9Class0 l9c0 = (Level9Class0) o;
+        assertFalse(i < 9);
+      } catch (ClassCastException cce) {
+        assertTrue(i < 9);
+      }
+    }
+
+    Level1Class8192 l1c8192 = (Level1Class8192) createInstance("Level1Class8192");
+    try {
+      Level1Class0 l1c0 = (Level1Class0) createInstance("Level1Class8192");
+      throw new AssertionError("Unexpected");
+    } catch (ClassCastException expected) {}
+    Level2Class1024 l2c1024 = (Level2Class1024) createInstance("Level2Class1024");
+    Level1Class0 l1c0 = (Level1Class0) createInstance("Level2Class1024");
+    try {
+      Level2Class0 l2c0 = (Level2Class0) createInstance("Level2Class1024");
+      throw new AssertionError("Unexpected");
+    } catch (ClassCastException expected) {}
+  }
+
+  public static Object createInstance(String className) throws Exception {
+    return Class.forName(className).newInstance();
+  }
+
+  public static void assertTrue(boolean value) throws Exception {
+    if (!value) {
+      throw new AssertionError();
+    }
+  }
+
+  public static void assertFalse(boolean value) throws Exception {
+    if (value) {
+      throw new AssertionError();
+    }
+  }
+}
+EOF
+
+./default-build "$@"
diff --git a/test/670-bitstring-type-check/expected.txt b/test/670-bitstring-type-check/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/670-bitstring-type-check/expected.txt
diff --git a/test/670-bitstring-type-check/info.txt b/test/670-bitstring-type-check/info.txt
new file mode 100644
index 0000000..a34ba86
--- /dev/null
+++ b/test/670-bitstring-type-check/info.txt
@@ -0,0 +1 @@
+Tests for the bitstring type checks.
diff --git a/test/710-varhandle-creation/src-art/Main.java b/test/710-varhandle-creation/src-art/Main.java
index 6d542bb..a737b5b 100644
--- a/test/710-varhandle-creation/src-art/Main.java
+++ b/test/710-varhandle-creation/src-art/Main.java
@@ -18,7 +18,6 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-import dalvik.system.VMRuntime;
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.VarHandle;
 import java.lang.invoke.VarHandle.AccessMode;
@@ -129,9 +128,6 @@
     static final VarHandle vbbd;
     static final VarHandle vbbo;
 
-    // Some test results vary depending on 32-bit vs 64-bit.
-    static final boolean IS_64_BIT = VMRuntime.getRuntime().is64Bit();
-
     static {
         try {
             vz = MethodHandles.lookup().findVarHandle(Main.class, "z", boolean.class);
@@ -1728,14 +1724,14 @@
         checkNotNull(vbaj);
         checkVarType(vbaj, long.class);
         checkCoordinateTypes(vbaj, "[class [B, int]");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(byte[],int,long)void");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(byte[],int)long");
-        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_VOLATILE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_VOLATILE, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_ACQUIRE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_RELEASE, true, "(byte[],int,long)void");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.GET_OPAQUE, true, "(byte[],int)long");
+        checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.SET_OPAQUE, true, "(byte[],int,long)void");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_SET, true, "(byte[],int,long,long)boolean");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(byte[],int,long,long)long");
         checkVarHandleAccessMode(vbaj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(byte[],int,long,long)long");
@@ -1800,14 +1796,14 @@
         checkNotNull(vbad);
         checkVarType(vbad, double.class);
         checkCoordinateTypes(vbad, "[class [B, int]");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(byte[],int,double)void");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(byte[],int)double");
-        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_VOLATILE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_VOLATILE, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_ACQUIRE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_RELEASE, true, "(byte[],int,double)void");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.GET_OPAQUE, true, "(byte[],int)double");
+        checkVarHandleAccessMode(vbad, VarHandle.AccessMode.SET_OPAQUE, true, "(byte[],int,double)void");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_SET, true, "(byte[],int,double,double)boolean");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(byte[],int,double,double)double");
         checkVarHandleAccessMode(vbad, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(byte[],int,double,double)double");
@@ -1953,14 +1949,14 @@
         checkNotNull(vbbj);
         checkVarType(vbbj, long.class);
         checkCoordinateTypes(vbbj, "[class java.nio.ByteBuffer, int]");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(ByteBuffer,int,long)void");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(ByteBuffer,int)long");
-        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_VOLATILE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_VOLATILE, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_ACQUIRE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_RELEASE, true, "(ByteBuffer,int,long)void");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.GET_OPAQUE, true, "(ByteBuffer,int)long");
+        checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.SET_OPAQUE, true, "(ByteBuffer,int,long)void");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_SET, true, "(ByteBuffer,int,long,long)boolean");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(ByteBuffer,int,long,long)long");
         checkVarHandleAccessMode(vbbj, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(ByteBuffer,int,long,long)long");
@@ -2025,14 +2021,14 @@
         checkNotNull(vbbd);
         checkVarType(vbbd, double.class);
         checkCoordinateTypes(vbbd, "[class java.nio.ByteBuffer, int]");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_VOLATILE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_VOLATILE, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_ACQUIRE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_RELEASE, IS_64_BIT, "(ByteBuffer,int,double)void");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_OPAQUE, IS_64_BIT, "(ByteBuffer,int)double");
-        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_OPAQUE, IS_64_BIT, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_VOLATILE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_VOLATILE, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_ACQUIRE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_RELEASE, true, "(ByteBuffer,int,double)void");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.GET_OPAQUE, true, "(ByteBuffer,int)double");
+        checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.SET_OPAQUE, true, "(ByteBuffer,int,double)void");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_SET, true, "(ByteBuffer,int,double,double)boolean");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_EXCHANGE, true, "(ByteBuffer,int,double,double)double");
         checkVarHandleAccessMode(vbbd, VarHandle.AccessMode.COMPARE_AND_EXCHANGE_ACQUIRE, true, "(ByteBuffer,int,double,double)double");
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 22c5106..c2408b0 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -25,6 +25,7 @@
 #include "instrumentation.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profile_compilation_info.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "nativehelper/ScopedUtfChars.h"