8 files changed, 534 insertions, 276 deletions
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index e41371855d..0f69dbab94 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -159,4 +159,10 @@ CompiledMethod::~CompiledMethod() {
   storage->ReleaseMethodInfo(method_info_);
 }
 
+void CompiledMethod::ReleaseVMapTable() {
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  storage->ReleaseVMapTable(vmap_table_);
+  vmap_table_ = nullptr;
+}
+
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index acdce260e5..4e8f3efe5a 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -168,6 +168,10 @@ class CompiledMethod FINAL : public CompiledCode {
 
   ArrayRef<const linker::LinkerPatch> GetPatches() const;
 
+  // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap
+  // table to avoid writing the quicken info to the vdex file.
+  void ReleaseVMapTable();
+
  private:
   static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
   static constexpr size_t kIsIntrinsicSize = 1u;
@@ -186,7 +190,7 @@ class CompiledMethod FINAL : public CompiledCode {
   // For quick code, method specific information that is not very dedupe friendly (method indices).
   const LengthPrefixedArray<uint8_t>* const method_info_;
   // For quick code, holds code infos which contain stack maps, inline information, and etc.
-  const LengthPrefixedArray<uint8_t>* const vmap_table_;
+  const LengthPrefixedArray<uint8_t>* vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 308e75d9c1..28c7fe2c34 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "dex/dex_file-inl.h"
 #include "dex/dex_instruction-inl.h"
+#include "dex_to_dex_decompiler.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
@@ -44,81 +45,106 @@ const bool kEnableQuickening = true;
 // Control check-cast elision.
 const bool kEnableCheckCastEllision = true;
 
-struct QuickenedInfo {
-  QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver)
+    : driver_(driver),
+      lock_("Quicken lock", kDexToDexCompilerLock) {
+  DCHECK(driver != nullptr);
+}
 
-  uint32_t dex_pc;
-  uint16_t dex_member_index;
-};
+void DexToDexCompiler::ClearState() {
+  MutexLock lock(Thread::Current(), lock_);
+  active_dex_file_ = nullptr;
+  active_bit_vector_ = nullptr;
+  seen_code_items_.clear();
+  should_quicken_.clear();
+  shared_code_items_.clear();
+  blacklisted_code_items_.clear();
+  shared_code_item_quicken_info_.clear();
+}
 
-class DexCompiler {
- public:
-  DexCompiler(art::CompilerDriver& compiler,
-              const DexCompilationUnit& unit,
-              DexToDexCompilationLevel dex_to_dex_compilation_level)
-    : driver_(compiler),
-      unit_(unit),
-      dex_to_dex_compilation_level_(dex_to_dex_compilation_level) {}
+size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const {
+  MutexLock lock(self, lock_);
+  return seen_code_items_.size();
+}
 
-  ~DexCompiler() {}
+BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) {
+  if (active_dex_file_ != dex_file) {
+    active_dex_file_ = dex_file;
+    auto inserted = should_quicken_.emplace(dex_file,
+                                            BitVector(dex_file->NumMethodIds(),
+                                                      /*expandable*/ false,
+                                                      Allocator::GetMallocAllocator()));
+    active_bit_vector_ = &inserted.first->second;
+  }
+  return active_bit_vector_;
+}
 
-  void Compile();
+void DexToDexCompiler::MarkForCompilation(Thread* self,
+                                          const MethodReference& method_ref,
+                                          const DexFile::CodeItem* code_item) {
+  MutexLock lock(self, lock_);
+  BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file);
+  DCHECK(bitmap != nullptr);
+  DCHECK(!bitmap->IsBitSet(method_ref.index));
+  bitmap->SetBit(method_ref.index);
+  // Detect the shared code items.
+  if (!seen_code_items_.insert(code_item).second) {
+    shared_code_items_.insert(code_item);
+  }
+}
 
-  const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
-    return quickened_info_;
+DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler,
+                                                     const DexCompilationUnit& unit,
+                                                     const CompilationLevel compilation_level,
+                                                     const std::vector<uint8_t>* quicken_data)
+    : compiler_(compiler),
+      driver_(*compiler->GetDriver()),
+      unit_(unit),
+      compilation_level_(compilation_level),
+      already_quickened_(quicken_data != nullptr),
+      existing_quicken_info_(already_quickened_
+          ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {}
+
+uint16_t DexToDexCompiler::CompilationState::NextIndex() {
+  DCHECK(already_quickened_);
+  if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) {
+    for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) {
+      LOG(ERROR) << pair->DumpString(nullptr);
+    }
+    LOG(FATAL) << "Mismatched number of quicken slots.";
   }
+  const uint16_t ret = existing_quicken_info_.GetData(quicken_index_);
+  quicken_index_++;
+  return ret;
+}
 
- private:
-  const DexFile& GetDexFile() const {
-    return *unit_.GetDexFile();
+uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst,
+                                                                    uint32_t index) {
+  if (UNLIKELY(already_quickened_)) {
+    return inst->IsQuickened() ? NextIndex() : index;
   }
+  DCHECK(!inst->IsQuickened());
+  return index;
+}
+
+bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) {
+  // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only
+  // only call ShouldCompileMethod on one dex at a time.
+  MutexLock lock(Thread::Current(), lock_);
+  return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index);
+}
 
-  // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
-  // a barrier is required.
-  void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
-
-  // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
-  // this case, returns the second NOP instruction pointer. Otherwise, returns
-  // the given "inst".
-  Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
-
-  // Compiles a field access into a quick field access.
-  // The field index is replaced by an offset within an Object where we can read
-  // from / write to this field. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the field index is encoded with 16 bits, we can replace it only if the
-  // field offset can be encoded with 16 bits too.
-  void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
-                                  Instruction::Code new_opcode, bool is_put);
-
-  // Compiles a virtual method invocation into a quick virtual method invocation.
-  // The method index is replaced by the vtable index where the corresponding
-  // Executable can be found. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the method index is encoded with 16 bits, we can replace it only if the
-  // vtable index can be encoded with 16 bits too.
-  void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                            Instruction::Code new_opcode, bool is_range);
-
-  CompilerDriver& driver_;
-  const DexCompilationUnit& unit_;
-  const DexToDexCompilationLevel dex_to_dex_compilation_level_;
-
-  // Filled by the compiler when quickening, in order to encode that information
-  // in the .oat file. The runtime will use that information to get to the original
-  // opcodes.
-  std::vector<QuickenedInfo> quickened_info_;
-
-  DISALLOW_COPY_AND_ASSIGN(DexCompiler);
-};
-
-void DexCompiler::Compile() {
-  DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
-  IterationRange<DexInstructionIterator> instructions(unit_.GetCodeItemAccessor().begin(),
-                                                      unit_.GetCodeItemAccessor().end());
+std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() {
+  DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize);
+  const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor();
   for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) {
     const uint32_t dex_pc = it.DexPc();
     Instruction* inst = const_cast<Instruction*>(&it.Inst());
+
+    if (!already_quickened_) {
+      DCHECK(!inst->IsQuickened());
+    }
+
     switch (inst->Opcode()) {
       case Instruction::RETURN_VOID:
         CompileReturnVoid(inst, dex_pc);
@@ -134,84 +160,147 @@ void DexCompiler::Compile() {
         break;
 
       case Instruction::IGET:
+      case Instruction::IGET_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false);
         break;
 
       case Instruction::IGET_WIDE:
+      case Instruction::IGET_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false);
         break;
 
       case Instruction::IGET_OBJECT:
+      case Instruction::IGET_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false);
         break;
 
       case Instruction::IGET_BOOLEAN:
+      case Instruction::IGET_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false);
         break;
 
       case Instruction::IGET_BYTE:
+      case Instruction::IGET_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false);
         break;
 
       case Instruction::IGET_CHAR:
+      case Instruction::IGET_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false);
         break;
 
       case Instruction::IGET_SHORT:
+      case Instruction::IGET_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false);
         break;
 
       case Instruction::IPUT:
+      case Instruction::IPUT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true);
         break;
 
       case Instruction::IPUT_BOOLEAN:
+      case Instruction::IPUT_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true);
         break;
 
       case Instruction::IPUT_BYTE:
+      case Instruction::IPUT_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true);
         break;
 
       case Instruction::IPUT_CHAR:
+      case Instruction::IPUT_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true);
         break;
 
       case Instruction::IPUT_SHORT:
+      case Instruction::IPUT_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true);
         break;
 
       case Instruction::IPUT_WIDE:
+      case Instruction::IPUT_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true);
         break;
 
       case Instruction::IPUT_OBJECT:
+      case Instruction::IPUT_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true);
         break;
 
       case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false);
         break;
 
       case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true);
         break;
 
       case Instruction::NOP:
-        // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
-        // index in the map for normal nops. This should be rare in real code.
-        quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        if (already_quickened_) {
+          const uint16_t reference_index = NextIndex();
+          quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index));
+          if (reference_index == DexFile::kDexNoIndex16) {
+            // This means it was a normal nop and not a check-cast.
+            break;
+          }
+          const uint16_t type_index = NextIndex();
+          if (driver_.IsSafeCast(&unit_, dex_pc)) {
+            quickened_info_.push_back(QuickenedInfo(dex_pc, type_index));
+          }
+          ++it;
+        } else {
+          // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
+          // index in the map for normal nops. This should be rare in real code.
+          quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        }
         break;
 
       default:
-        DCHECK(!inst->IsQuickened());
         // Nothing to do.
         break;
     }
   }
+
+  if (already_quickened_) {
+    DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices());
+  }
+
+  if (GetQuickenedInfo().empty()) {
+    // No need to create a CompiledMethod if there are no quickened opcodes.
+    return std::vector<uint8_t>();
+  }
+
+  std::vector<uint8_t> quicken_data;
+  if (kIsDebugBuild) {
+    // Double check that the counts line up with the size of the quicken info.
+    size_t quicken_count = 0;
+    for (const DexInstructionPcPair& pair : instructions) {
+      if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
+        ++quicken_count;
+      }
+    }
+    CHECK_EQ(quicken_count, GetQuickenedInfo().size());
+  }
+
+  QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size());
+  // Length is encoded by the constructor.
+  for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) {
+    // Dex pc is not serialized, only used for checking the instructions. Since we access the
+    // array based on the index of the quickened instruction, the indexes must line up perfectly.
+    // The reader side uses the NeedsIndexForInstruction function too.
+    const Instruction& inst = instructions.InstructionAt(info.dex_pc);
+    CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
+    builder.AddIndex(info.dex_member_index);
+  }
+  DCHECK(!quicken_data.empty());
+  return quicken_data;
 }
 
-void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
+void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
   DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID);
   if (unit_.IsConstructor()) {
     // Are we compiling a non clinit constructor which needs a barrier ?
@@ -229,7 +318,8 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
   inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
 }
 
-Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
+Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst,
+                                                                  uint32_t dex_pc) {
   if (!kEnableCheckCastEllision) {
     return inst;
   }
@@ -246,27 +336,30 @@ Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
                  << " by replacing it with 2 NOPs at dex pc "
                  << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
-  // Get to next instruction which is the second half of check-cast and replace
-  // it by a NOP.
-  inst = const_cast<Instruction*>(inst->Next());
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  if (!already_quickened_) {
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
+
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+    // Get to next instruction which is the second half of check-cast and replace
+    // it by a NOP.
+    inst = const_cast<Instruction*>(inst->Next());
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  }
   return inst;
 }
 
-void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
-                                             uint32_t dex_pc,
-                                             Instruction::Code new_opcode,
-                                             bool is_put) {
+void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst,
+                                                                    uint32_t dex_pc,
+                                                                    Instruction::Code new_opcode,
+                                                                    bool is_put) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t field_idx = inst->VRegC_22c();
+  uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c());
   MemberOffset field_offset(0u);
   bool is_volatile;
   bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
@@ -278,20 +371,29 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
                    << " by field offset " << field_offset.Int32Value()
                    << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                    << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-    // We are modifying 4 consecutive bytes.
-    inst->SetOpcode(new_opcode);
-    // Replace field index by field offset.
-    inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    if (!already_quickened_) {
+      // We are modifying 4 consecutive bytes.
+      inst->SetOpcode(new_opcode);
+      // Replace field index by field offset.
+      inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    }
     quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx));
   }
 }
 
-void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                                       Instruction::Code new_opcode, bool is_range) {
+const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const {
+  return *unit_.GetDexFile();
+}
+
+void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst,
+                                                              uint32_t dex_pc,
+                                                              Instruction::Code new_opcode,
+                                                              bool is_range) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
+  uint32_t method_idx = GetIndexForInstruction(inst,
+                                               is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
   ScopedObjectAccess soa(Thread::Current());
 
   ClassLinker* class_linker = unit_.GetClassLinker();
@@ -318,19 +420,20 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
                  << " by vtable index " << vtable_idx
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(new_opcode);
-  // Replace method index by vtable index.
-  if (is_range) {
-    inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
-  } else {
-    inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+  if (!already_quickened_) {
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(new_opcode);
+    // Replace method index by vtable index.
+    if (is_range) {
+      inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
+    } else {
+      inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+    }
   }
   quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx));
 }
 
-CompiledMethod* ArtCompileDEX(
-    CompilerDriver* driver,
+CompiledMethod* DexToDexCompiler::CompileMethod(
     const DexFile::CodeItem* code_item,
     uint32_t access_flags,
     InvokeType invoke_type ATTRIBUTE_UNUSED,
@@ -338,69 +441,122 @@ CompiledMethod* ArtCompileDEX(
     uint32_t method_idx,
     Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file,
-    DexToDexCompilationLevel dex_to_dex_compilation_level) {
-  DCHECK(driver != nullptr);
-  if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    art::DexCompilationUnit unit(
-        class_loader,
-        class_linker,
-        dex_file,
-        code_item,
-        class_def_idx,
-        method_idx,
-        access_flags,
-        driver->GetVerifiedMethod(&dex_file, method_idx),
-        hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
-    art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level);
-    dex_compiler.Compile();
-    if (dex_compiler.GetQuickenedInfo().empty()) {
-      // No need to create a CompiledMethod if there are no quickened opcodes.
+    CompilationLevel compilation_level) {
+  if (compilation_level == CompilationLevel::kDontDexToDexCompile) {
+    return nullptr;
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  art::DexCompilationUnit unit(
+      class_loader,
+      class_linker,
+      dex_file,
+      code_item,
+      class_def_idx,
+      method_idx,
+      access_flags,
+      driver_->GetVerifiedMethod(&dex_file, method_idx),
+      hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
+
+  std::vector<uint8_t> quicken_data;
+  // If the code item is shared with multiple different method ids, make sure that we quicken only
+  // once and verify that all the dequicken maps match.
+  if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) {
+    // For shared code items, use a lock to prevent races.
+    MutexLock mu(soa.Self(), lock_);
+    // Blacklisted means there was a quickening conflict previously, bail early.
+    if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) {
       return nullptr;
     }
+    auto existing = shared_code_item_quicken_info_.find(code_item);
+    const bool already_quickened = existing != shared_code_item_quicken_info_.end();
+    {
+      CompilationState state(this,
+                             unit,
+                             compilation_level,
+                             already_quickened ? &existing->second.quicken_data_ : nullptr);
+      quicken_data = state.Compile();
+    }
 
-    // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    if (kIsDebugBuild) {
-      // Double check that the counts line up with the size of the quicken info.
-      size_t quicken_count = 0;
-      for (const DexInstructionPcPair& pair : unit.GetCodeItemAccessor()) {
-        if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
-          ++quicken_count;
+    // Already quickened, check that the data matches what was previously seen.
+    MethodReference method_ref(&dex_file, method_idx);
+    if (already_quickened) {
+      QuickenState* const existing_data = &existing->second;
+      if (existing_data->quicken_data_ != quicken_data) {
+        VLOG(compiler) << "Quicken data mismatch, dequickening method "
+                       << dex_file.PrettyMethod(method_idx);
+        // Unquicken using the existing quicken data.
+        optimizer::ArtDecompileDEX(dex_file,
+                                   *code_item,
+                                   ArrayRef<const uint8_t>(existing_data->quicken_data_),
+                                   /* decompile_return_instruction*/ false);
+        // Go clear the vmaps for all the methods that were already quickened to avoid writing them
+        // out during oat writing.
+        for (const MethodReference& ref : existing_data->methods_) {
+          CompiledMethod* method = driver_->GetCompiledMethod(ref);
+          DCHECK(method != nullptr);
+          method->ReleaseVMapTable();
         }
+        // Blacklist the method to never attempt to quicken it in the future.
+        blacklisted_code_items_.insert(code_item);
+        shared_code_item_quicken_info_.erase(existing);
+        return nullptr;
       }
-      CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size());
+      existing_data->methods_.push_back(method_ref);
+    } else {
+      QuickenState new_state;
+      new_state.methods_.push_back(method_ref);
+      new_state.quicken_data_ = quicken_data;
+      bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second;
+      CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx);
     }
-    std::vector<uint8_t> quicken_data;
-    QuickenInfoTable::Builder builder(&quicken_data, dex_compiler.GetQuickenedInfo().size());
-    // Length is encoded by the constructor.
-    for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
-      // Dex pc is not serialized, only used for checking the instructions. Since we access the
-      // array based on the index of the quickened instruction, the indexes must line up perfectly.
-      // The reader side uses the NeedsIndexForInstruction function too.
-      const Instruction& inst = unit.GetCodeItemAccessor().InstructionAt(info.dex_pc);
-      CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
-      builder.AddIndex(info.dex_member_index);
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    // Note that this needs to be behind the lock for this case since we may unquicken in another
+    // thread.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    InstructionSet instruction_set = driver->GetInstructionSet();
-    if (instruction_set == InstructionSet::kThumb2) {
-      // Don't use the thumb2 instruction set to avoid the one off code delta.
-      instruction_set = InstructionSet::kArm;
+  } else {
+    CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr);
+    quicken_data = state.Compile();
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    return CompiledMethod::SwapAllocCompiledMethod(
-        driver,
-        instruction_set,
-        ArrayRef<const uint8_t>(),                   // no code
-        0,
-        0,
-        0,
-        ArrayRef<const uint8_t>(),                   // method_info
-        ArrayRef<const uint8_t>(quicken_data),       // vmap_table
-        ArrayRef<const uint8_t>(),                   // cfi data
-        ArrayRef<const linker::LinkerPatch>());
   }
-  return nullptr;
+
+  if (quicken_data.empty()) {
+    return nullptr;
+  }
+
+  // Create a `CompiledMethod`, with the quickened information in the vmap table.
+  InstructionSet instruction_set = driver_->GetInstructionSet();
+  if (instruction_set == InstructionSet::kThumb2) {
+    // Don't use the thumb2 instruction set to avoid the one off code delta.
+    instruction_set = InstructionSet::kArm;
+  }
+  CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod(
+      driver_,
+      instruction_set,
+      ArrayRef<const uint8_t>(),                   // no code
+      0,
+      0,
+      0,
+      ArrayRef<const uint8_t>(),                   // method_info
+      ArrayRef<const uint8_t>(quicken_data),       // vmap_table
+      ArrayRef<const uint8_t>(),                   // cfi data
+      ArrayRef<const linker::LinkerPatch>());
+  return ret;
 }
 
 }  // namespace optimizer
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 80b94d2dc3..abd048167c 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -17,14 +17,22 @@
 #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "base/bit_vector.h"
 #include "dex/dex_file.h"
 #include "handle.h"
 #include "invoke_type.h"
+#include "method_reference.h"
+#include "quicken_info.h"
 
 namespace art {
 
 class CompiledMethod;
 class CompilerDriver;
+class DexCompilationUnit;
 
 namespace mirror {
 class ClassLoader;
@@ -32,21 +40,144 @@ class ClassLoader;
 
 namespace optimizer {
 
-enum class DexToDexCompilationLevel {
-  kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
-  kOptimize               // Perform peep-hole optimizations.
+class DexToDexCompiler {
+ public:
+  enum class CompilationLevel {
+    kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
+    kOptimize               // Perform peep-hole optimizations.
+  };
+
+  explicit DexToDexCompiler(CompilerDriver* driver);
+
+  CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item,
+                                uint32_t access_flags,
+                                InvokeType invoke_type,
+                                uint16_t class_def_idx,
+                                uint32_t method_idx,
+                                Handle<mirror::ClassLoader> class_loader,
+                                const DexFile& dex_file,
+                                const CompilationLevel compilation_level) WARN_UNUSED;
+
+  void MarkForCompilation(Thread* self,
+                          const MethodReference& method_ref,
+                          const DexFile::CodeItem* code_item);
+
+  void ClearState();
+
+  CompilerDriver* GetDriver() {
+    return driver_;
+  }
+
+  bool ShouldCompileMethod(const MethodReference& ref);
+
+  size_t NumUniqueCodeItems(Thread* self) const;
+
+ private:
+  // Holds the state for compiling a single method.
+  struct CompilationState {
+    struct QuickenedInfo {
+      QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+
+      uint32_t dex_pc;
+      uint16_t dex_member_index;
+    };
+
+    CompilationState(DexToDexCompiler* compiler,
+                     const DexCompilationUnit& unit,
+                     const CompilationLevel compilation_level,
+                     const std::vector<uint8_t>* quicken_data);
+
+    const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
+      return quickened_info_;
+    }
+
+    // Returns the quickening info, or an empty array if it was not quickened.
+    // If already_quickened is true, then don't change anything but still return what the quicken
+    // data would have been.
+    std::vector<uint8_t> Compile();
+
+    const DexFile& GetDexFile() const;
+
+    // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
+    // a barrier is required.
+    void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
+    // this case, returns the second NOP instruction pointer. Otherwise, returns
+    // the given "inst".
+    Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a field access into a quick field access.
+    // The field index is replaced by an offset within an Object where we can read
+    // from / write to this field. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the field index is encoded with 16 bits, we can replace it only if the
+    // field offset can be encoded with 16 bits too.
+    void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
+                                    Instruction::Code new_opcode, bool is_put);
+
+    // Compiles a virtual method invocation into a quick virtual method invocation.
+    // The method index is replaced by the vtable index where the corresponding
+    // executable can be found. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the method index is encoded with 16 bits, we can replace it only if the
+    // vtable index can be encoded with 16 bits too.
+    void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
+                              Instruction::Code new_opcode, bool is_range);
+
+    // Return the next index.
+    uint16_t NextIndex();
+
+    // Returns the dequickened index if an instruction is quickened, otherwise return index.
+    uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index);
+
+    DexToDexCompiler* const compiler_;
+    CompilerDriver& driver_;
+    const DexCompilationUnit& unit_;
+    const CompilationLevel compilation_level_;
+
+    // Filled by the compiler when quickening, in order to encode that information
+    // in the .oat file. The runtime will use that information to get to the original
+    // opcodes.
+    std::vector<QuickenedInfo> quickened_info_;
+
+    // If the code item was already quickened previously.
+    const bool already_quickened_;
+    const QuickenInfoTable existing_quicken_info_;
+    uint32_t quicken_index_ = 0u;
+
+    DISALLOW_COPY_AND_ASSIGN(CompilationState);
+  };
+
+  struct QuickenState {
+    std::vector<MethodReference> methods_;
+    std::vector<uint8_t> quicken_data_;
+  };
+
+  BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_);
+
+  CompilerDriver* const driver_;
+
+  // State for adding methods (should this be in its own class?).
+  const DexFile* active_dex_file_ = nullptr;
+  BitVector* active_bit_vector_ = nullptr;
+
+  // Lock that guards duplicate code items and the bitmap.
+  mutable Mutex lock_;
+  // Record what method references are going to get quickened.
+  std::unordered_map<const DexFile*, BitVector> should_quicken_;
+  // Record what code items are already seen to detect when multiple methods have the same code
+  // item.
+  std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_);
+  // Guarded by lock_ during writing, accessed without a lock during quickening.
+  // This is safe because no thread is adding to the shared code items during the quickening phase.
+  std::unordered_set<const DexFile::CodeItem*> shared_code_items_;
+  std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_);
+  std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_
+      GUARDED_BY(lock_);
 };
-std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
-
-CompiledMethod* ArtCompileDEX(CompilerDriver* driver,
-                              const DexFile::CodeItem* code_item,
-                              uint32_t access_flags,
-                              InvokeType invoke_type,
-                              uint16_t class_def_idx,
-                              uint32_t method_idx,
-                              Handle<mirror::ClassLoader> class_loader,
-                              const DexFile& dex_file,
-                              DexToDexCompilationLevel dex_to_dex_compilation_level);
+
+std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs);
 
 }  // namespace optimizer
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 869865956c..6c5cc50269 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -255,24 +255,6 @@ class CompilerDriver::AOTCompilationStats {
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
-class CompilerDriver::DexFileMethodSet {
- public:
-  explicit DexFileMethodSet(const DexFile& dex_file)
-    : dex_file_(dex_file),
-      method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) {
-  }
-  DexFileMethodSet(DexFileMethodSet&& other) = default;
-
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  BitVector& GetMethodIndexes() { return method_indexes_; }
-  const BitVector& GetMethodIndexes() const { return method_indexes_; }
-
- private:
-  const DexFile& dex_file_;
-  BitVector method_indexes_;
-};
-
 CompilerDriver::CompilerDriver(
     const CompilerOptions* compiler_options,
     VerificationResults* verification_results,
@@ -306,9 +288,8 @@ CompilerDriver::CompilerDriver(
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
       max_arena_alloc_(0),
-      dex_to_dex_references_lock_("dex-to-dex references lock"),
-      dex_to_dex_references_(),
-      current_dex_to_dex_methods_(nullptr) {
+      compiling_dex_to_dex_(false),
+      dex_to_dex_compiler_(this) {
   DCHECK(compiler_options_ != nullptr);
 
   compiler_->Init();
@@ -398,7 +379,7 @@ void CompilerDriver::CompileAll(jobject class_loader,
   FreeThreadPools();
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -410,7 +391,7 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
   if (klass == nullptr) {
     CHECK(self->IsExceptionPending());
     self->ClearException();
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic
   // references with actual offsets. We cannot re-verify such instructions.
@@ -418,22 +399,23 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
   // We store the verification information in the class status in the oat file, which the linker
   // can validate (checksums) and use to skip load-time verification. It is thus safe to
   // optimize when a class has been fully verified before.
-  optimizer::DexToDexCompilationLevel max_level = optimizer::DexToDexCompilationLevel::kOptimize;
+  optimizer::DexToDexCompiler::CompilationLevel max_level =
+      optimizer::DexToDexCompiler::CompilationLevel::kOptimize;
   if (driver.GetCompilerOptions().GetDebuggable()) {
     // We are debuggable so definitions of classes might be changed. We don't want to do any
     // optimizations that could break that.
-    max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
   } else {
     // Class verification has failed: do not run DEX-to-DEX optimizations.
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self,
     const CompilerDriver& driver,
     jobject jclass_loader,
@@ -470,7 +452,7 @@ static void CompileMethod(Thread* self,
                           uint32_t method_idx,
                           Handle<mirror::ClassLoader> class_loader,
                           const DexFile& dex_file,
-                          optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
+                          optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
@@ -478,18 +460,18 @@ static void CompileMethod(Thread* self,
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if (driver->GetCurrentDexToDexMethods() != nullptr) {
+  if (driver->GetCompilingDexToDex()) {
+    optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler();
     // This is the second pass when we dex-to-dex compile previously marked methods.
     // TODO: Refactor the compilation to avoid having to distinguish the two passes
     // here. That should be done on a higher level. http://b/29089975
-    if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) {
+    if (compiler->ShouldCompileMethod(method_ref)) {
       VerificationResults* results = driver->GetVerificationResults();
       DCHECK(results != nullptr);
       const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
       // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
       // for example, relies on it.
-      compiled_method = optimizer::ArtCompileDEX(
-          driver,
+      compiled_method = compiler->CompileMethod(
           code_item,
           access_flags,
           invoke_type,
@@ -499,7 +481,7 @@ static void CompileMethod(Thread* self,
           dex_file,
           (verified_method != nullptr)
               ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
+              : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile);
     }
   } else if ((access_flags & kAccNative) != 0) {
     // Are we extracting only and have support for generic JNI down calls?
@@ -524,7 +506,7 @@ static void CompileMethod(Thread* self,
     bool compile = compilation_enabled &&
         // Basic checks, e.g., not <clinit>.
         results->IsCandidateForCompilation(method_ref, access_flags) &&
-        // Did not fail to create VerifiedMethod metadata.
+        // Did not fail to create VerifiedMethod metadcata.
         verified_method != nullptr &&
         // Do not have failures that should punt to the interpreter.
         !verified_method->HasRuntimeThrow() &&
@@ -546,10 +528,12 @@ static void CompileMethod(Thread* self,
                                                        dex_cache);
     }
     if (compiled_method == nullptr &&
-        dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
+        dex_to_dex_compilation_level !=
+            optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) {
       DCHECK(!Runtime::Current()->UseJitCompilation());
+      DCHECK(!driver->GetCompilingDexToDex());
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      driver->MarkForDexToDexCompilation(self, method_ref);
+      driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item);
     }
   }
   if (kTimeCompileMethod) {
@@ -616,14 +600,14 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
   PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+  optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
       GetDexToDexCompilationLevel(self,
                                   *this,
                                   jclass_loader,
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
-  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  DCHECK(!compiling_dex_to_dex_);
   CompileMethod(self,
                 this,
                 code_item,
@@ -637,19 +621,10 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
                 true,
                 dex_cache);
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
-  }
-  if (!dex_to_dex_references.empty()) {
-    DCHECK_EQ(dex_to_dex_references.size(), 1u);
-    DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file);
-    current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes();
-    DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx));
-    DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u);
+  const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self);
+  if (num_methods != 0) {
+    DCHECK_EQ(num_methods, 1u);
+    compiling_dex_to_dex_ = true;
     CompileMethod(self,
                   this,
                   code_item,
@@ -662,7 +637,8 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
                   dex_to_dex_compilation_level,
                   true,
                   dex_cache);
-    current_dex_to_dex_methods_ = nullptr;
+    compiling_dex_to_dex_ = false;
+    dex_to_dex_compiler_.ClearState();
   }
 
   FreeThreadPools();
@@ -1280,17 +1256,6 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) {
   return IsImageClass(descriptor);
 }
 
-void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) {
-  MutexLock lock(self, dex_to_dex_references_lock_);
-  // Since we're compiling one dex file at a time, we need to look for the
-  // current dex file entry only at the end of dex_to_dex_references_.
-  if (dex_to_dex_references_.empty() ||
-      &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) {
-    dex_to_dex_references_.emplace_back(*method_ref.dex_file);
-  }
-  dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.index);
-}
-
 bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
                                                 ObjPtr<mirror::Class> resolved_class) {
   if (resolved_class == nullptr) {
@@ -2612,14 +2577,8 @@ void CompilerDriver::Compile(jobject class_loader,
             : profile_compilation_info_->DumpInfo(&dex_files));
   }
 
-  current_dex_to_dex_methods_ = nullptr;
-  Thread* const self = Thread::Current();
-  {
-    // Clear in case we aren't the first call to Compile.
-    MutexLock mu(self, dex_to_dex_references_lock_);
-    dex_to_dex_references_.clear();
-  }
-
+  dex_to_dex_compiler_.ClearState();
+  compiling_dex_to_dex_ = false;
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader,
@@ -2634,23 +2593,21 @@ void CompilerDriver::Compile(jobject class_loader,
     Runtime::Current()->ReclaimArenaPoolMemory();
   }
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(self, dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
-  }
-  for (const auto& method_set : dex_to_dex_references) {
-    current_dex_to_dex_methods_ = &method_set.GetMethodIndexes();
-    CompileDexFile(class_loader,
-                   method_set.GetDexFile(),
-                   dex_files,
-                   parallel_thread_pool_.get(),
-                   parallel_thread_count_,
-                   timings);
+  if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) {
+    compiling_dex_to_dex_ = true;
+    // TODO: Not visit all of the dex files, its probably rare that only one would have quickened
+    // methods though.
+    for (const DexFile* dex_file : dex_files) {
+      CompileDexFile(class_loader,
+                     *dex_file,
+                     dex_files,
+                     parallel_thread_pool_.get(),
+                     parallel_thread_count_,
+                     timings);
+    }
+    dex_to_dex_compiler_.ClearState();
+    compiling_dex_to_dex_ = false;
   }
-  current_dex_to_dex_methods_ = nullptr;
 
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2701,7 +2658,7 @@ class CompileClassVisitor : public CompilationVisitor {
     CompilerDriver* const driver = manager_->GetCompiler();
 
     // Can we run DEX-to-DEX compiler on this class ?
-    optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
         GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def);
 
     ClassDataItemIterator it(dex_file, class_data);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ef16212fb7..87a8a186c1 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -35,6 +35,7 @@
 #include "compiler.h"
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
+#include "dex/dex_to_dex_compiler.h"
 #include "driver/compiled_method_storage.h"
 #include "jit/profile_compilation_info.h"
 #include "method_reference.h"
@@ -120,12 +121,11 @@ class CompilerDriver {
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!Locks::mutator_lock_);
 
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_to_dex_references_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   VerificationResults* GetVerificationResults() const;
 
@@ -362,13 +362,6 @@ class CompilerDriver {
     return true;
   }
 
-  void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref)
-      REQUIRES(!dex_to_dex_references_lock_);
-
-  const BitVector* GetCurrentDexToDexMethods() const {
-    return current_dex_to_dex_methods_;
-  }
-
   const ProfileCompilationInfo* GetProfileCompilationInfo() const {
     return profile_compilation_info_;
   }
@@ -381,6 +374,14 @@ class CompilerDriver {
         || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
   }
 
+  bool GetCompilingDexToDex() const {
+    return compiling_dex_to_dex_;
+  }
+
+  optimizer::DexToDexCompiler& GetDexToDexCompiler() {
+    return dex_to_dex_compiler_;
+  }
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
@@ -447,7 +448,7 @@ class CompilerDriver {
 
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
-               TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_);
+               TimingLogger* timings);
   void CompileDexFile(jobject class_loader,
                       const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
@@ -539,14 +540,9 @@ class CompilerDriver {
 
   size_t max_arena_alloc_;
 
-  // Data for delaying dex-to-dex compilation.
-  Mutex dex_to_dex_references_lock_;
-  // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation.
-  class DexFileMethodSet;
-  std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_);
-  // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method
-  // indexes for dex-to-dex compilation in the current dex file.
-  const BitVector* current_dex_to_dex_methods_;
+  // Compiler for dex to dex (quickening).
+  bool compiling_dex_to_dex_;
+  optimizer::DexToDexCompiler dex_to_dex_compiler_;
 
   friend class CompileClassVisitor;
   friend class DexToDexDecompilerTest;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7077298ca9..d541b79a98 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -101,6 +101,7 @@ enum LockLevel {
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kDexToDexCompilerLock,
   kJitCodeCacheLock,
   kCHALock,
   kSubtypeCheckLock,
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 118cffeda6..cab91dfe76 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -19,6 +19,7 @@
 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
 
 #include <memory>
+#include <unordered_set>
 
 #include <android-base/logging.h>
 
@@ -265,6 +266,8 @@ void VdexFile::UnquickenDexFile(const DexFile& target_dex_file,
     // RETURN_VOID_NO_BARRIER instructions to RETURN_VOID instructions.
     return;
   }
+  // Make sure to not unquicken the same code item multiple times.
+  std::unordered_set<const DexFile::CodeItem*> unquickened_code_item;
   for (uint32_t i = 0; i < target_dex_file.NumClassDefs(); ++i) {
     const DexFile::ClassDef& class_def = target_dex_file.GetClassDef(i);
     const uint8_t* class_data = target_dex_file.GetClassData(class_def);
@@ -274,6 +277,10 @@ void VdexFile::UnquickenDexFile(const DexFile& target_dex_file,
            class_it.Next()) {
         if (class_it.IsAtMethod() && class_it.GetMethodCodeItem() != nullptr) {
           const DexFile::CodeItem* code_item = class_it.GetMethodCodeItem();
+          if (!unquickened_code_item.emplace(code_item).second) {
+            // Already unquickened this code item, do not do it again.
+            continue;
+          }
           ArrayRef<const uint8_t> quicken_data;
           if (!quickening_info.empty()) {
             const uint32_t quickening_offset = GetQuickeningInfoOffset(