Fix quickening logic Fix varius bugs in the quickening logic where related to shared code items for different methods. Fixed the case where two methods quicken differently on the same code item by checking that the quicken info is the same for all methods that quicken the same code item. This is accomplished by requickening and reverifying the contents of the quicken info. Fixed the case where the dex to dex compiler would abort from a DCHECK that there was no already quickened instructions. Feature is tested by enabling deduping (aog/594315). Test: test-art-host Bug: 63756964 Change-Id: I52c2b89518f4e808594b450a5fcc373ab5a5863b

commit: a79efdb69350fa66e1beabed4499ef4d0e809785 [log] [tgz]
author: Mathieu Chartier <mathieuc@google.com> Thu Jan 18 16:31:01 2018 -0800
committer: Mathieu Chartier <mathieuc@google.com> Mon Jan 22 08:46:37 2018 -0800
tree: 0a7491e10f646075b2d535cf69e13145224bb2ca
parent: ccc5401bf082e4292a2cb75eeeb216a441189aef [diff]
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index e413718..0f69dba 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc

@@ -159,4 +159,10 @@
   storage->ReleaseMethodInfo(method_info_);
 }
 
+void CompiledMethod::ReleaseVMapTable() {
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  storage->ReleaseVMapTable(vmap_table_);
+  vmap_table_ = nullptr;
+}
+
 }  // namespace art

diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index acdce26..4e8f3ef 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h

@@ -168,6 +168,10 @@
 
   ArrayRef<const linker::LinkerPatch> GetPatches() const;
 
+  // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap
+  // table to avoid writing the quicken info to the vdex file.
+  void ReleaseVMapTable();
+
  private:
   static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
   static constexpr size_t kIsIntrinsicSize = 1u;
@@ -186,7 +190,7 @@
   // For quick code, method specific information that is not very dedupe friendly (method indices).
   const LengthPrefixedArray<uint8_t>* const method_info_;
   // For quick code, holds code infos which contain stack maps, inline information, and etc.
-  const LengthPrefixedArray<uint8_t>* const vmap_table_;
+  const LengthPrefixedArray<uint8_t>* vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.

diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 308e75d..28c7fe2 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc

@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "dex/dex_file-inl.h"
 #include "dex/dex_instruction-inl.h"
+#include "dex_to_dex_decompiler.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
@@ -44,81 +45,106 @@
 // Control check-cast elision.
 const bool kEnableCheckCastEllision = true;
 
-struct QuickenedInfo {
-  QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver)
+    : driver_(driver),
+      lock_("Quicken lock", kDexToDexCompilerLock) {
+  DCHECK(driver != nullptr);
+}
 
-  uint32_t dex_pc;
-  uint16_t dex_member_index;
-};
+void DexToDexCompiler::ClearState() {
+  MutexLock lock(Thread::Current(), lock_);
+  active_dex_file_ = nullptr;
+  active_bit_vector_ = nullptr;
+  seen_code_items_.clear();
+  should_quicken_.clear();
+  shared_code_items_.clear();
+  blacklisted_code_items_.clear();
+  shared_code_item_quicken_info_.clear();
+}
 
-class DexCompiler {
- public:
-  DexCompiler(art::CompilerDriver& compiler,
-              const DexCompilationUnit& unit,
-              DexToDexCompilationLevel dex_to_dex_compilation_level)
-    : driver_(compiler),
+size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const {
+  MutexLock lock(self, lock_);
+  return seen_code_items_.size();
+}
+
+BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) {
+  if (active_dex_file_ != dex_file) {
+    active_dex_file_ = dex_file;
+    auto inserted = should_quicken_.emplace(dex_file,
+                                            BitVector(dex_file->NumMethodIds(),
+                                                      /*expandable*/ false,
+                                                      Allocator::GetMallocAllocator()));
+    active_bit_vector_ = &inserted.first->second;
+  }
+  return active_bit_vector_;
+}
+
+void DexToDexCompiler::MarkForCompilation(Thread* self,
+                                          const MethodReference& method_ref,
+                                          const DexFile::CodeItem* code_item) {
+  MutexLock lock(self, lock_);
+  BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file);
+  DCHECK(bitmap != nullptr);
+  DCHECK(!bitmap->IsBitSet(method_ref.index));
+  bitmap->SetBit(method_ref.index);
+  // Detect the shared code items.
+  if (!seen_code_items_.insert(code_item).second) {
+    shared_code_items_.insert(code_item);
+  }
+}
+
+DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler,
+                                                     const DexCompilationUnit& unit,
+                                                     const CompilationLevel compilation_level,
+                                                     const std::vector<uint8_t>* quicken_data)
+    : compiler_(compiler),
+      driver_(*compiler->GetDriver()),
       unit_(unit),
-      dex_to_dex_compilation_level_(dex_to_dex_compilation_level) {}
+      compilation_level_(compilation_level),
+      already_quickened_(quicken_data != nullptr),
+      existing_quicken_info_(already_quickened_
+          ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {}
 
-  ~DexCompiler() {}
-
-  void Compile();
-
-  const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
-    return quickened_info_;
+uint16_t DexToDexCompiler::CompilationState::NextIndex() {
+  DCHECK(already_quickened_);
+  if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) {
+    for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) {
+      LOG(ERROR) << pair->DumpString(nullptr);
+    }
+    LOG(FATAL) << "Mismatched number of quicken slots.";
   }
+  const uint16_t ret = existing_quicken_info_.GetData(quicken_index_);
+  quicken_index_++;
+  return ret;
+}
 
- private:
-  const DexFile& GetDexFile() const {
-    return *unit_.GetDexFile();
+uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst,
+                                                                    uint32_t index) {
+  if (UNLIKELY(already_quickened_)) {
+    return inst->IsQuickened() ? NextIndex() : index;
   }
+  DCHECK(!inst->IsQuickened());
+  return index;
+}
 
-  // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
-  // a barrier is required.
-  void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) {
+  // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only
+  // only call ShouldCompileMethod on one dex at a time.
+  MutexLock lock(Thread::Current(), lock_);
+  return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index);
+}
 
-  // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
-  // this case, returns the second NOP instruction pointer. Otherwise, returns
-  // the given "inst".
-  Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
-
-  // Compiles a field access into a quick field access.
-  // The field index is replaced by an offset within an Object where we can read
-  // from / write to this field. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the field index is encoded with 16 bits, we can replace it only if the
-  // field offset can be encoded with 16 bits too.
-  void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
-                                  Instruction::Code new_opcode, bool is_put);
-
-  // Compiles a virtual method invocation into a quick virtual method invocation.
-  // The method index is replaced by the vtable index where the corresponding
-  // Executable can be found. Therefore, this does not involve any resolution
-  // at runtime.
-  // Since the method index is encoded with 16 bits, we can replace it only if the
-  // vtable index can be encoded with 16 bits too.
-  void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                            Instruction::Code new_opcode, bool is_range);
-
-  CompilerDriver& driver_;
-  const DexCompilationUnit& unit_;
-  const DexToDexCompilationLevel dex_to_dex_compilation_level_;
-
-  // Filled by the compiler when quickening, in order to encode that information
-  // in the .oat file. The runtime will use that information to get to the original
-  // opcodes.
-  std::vector<QuickenedInfo> quickened_info_;
-
-  DISALLOW_COPY_AND_ASSIGN(DexCompiler);
-};
-
-void DexCompiler::Compile() {
-  DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
-  IterationRange<DexInstructionIterator> instructions(unit_.GetCodeItemAccessor().begin(),
-                                                      unit_.GetCodeItemAccessor().end());
+std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() {
+  DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize);
+  const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor();
   for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) {
     const uint32_t dex_pc = it.DexPc();
     Instruction* inst = const_cast<Instruction*>(&it.Inst());
+
+    if (!already_quickened_) {
+      DCHECK(!inst->IsQuickened());
+    }
+
     switch (inst->Opcode()) {
       case Instruction::RETURN_VOID:
         CompileReturnVoid(inst, dex_pc);
@@ -134,84 +160,147 @@
         break;
 
       case Instruction::IGET:
+      case Instruction::IGET_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false);
         break;
 
       case Instruction::IGET_WIDE:
+      case Instruction::IGET_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false);
         break;
 
       case Instruction::IGET_OBJECT:
+      case Instruction::IGET_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false);
         break;
 
       case Instruction::IGET_BOOLEAN:
+      case Instruction::IGET_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false);
         break;
 
       case Instruction::IGET_BYTE:
+      case Instruction::IGET_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false);
         break;
 
       case Instruction::IGET_CHAR:
+      case Instruction::IGET_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false);
         break;
 
       case Instruction::IGET_SHORT:
+      case Instruction::IGET_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false);
         break;
 
       case Instruction::IPUT:
+      case Instruction::IPUT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true);
         break;
 
       case Instruction::IPUT_BOOLEAN:
+      case Instruction::IPUT_BOOLEAN_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true);
         break;
 
       case Instruction::IPUT_BYTE:
+      case Instruction::IPUT_BYTE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true);
         break;
 
       case Instruction::IPUT_CHAR:
+      case Instruction::IPUT_CHAR_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true);
         break;
 
       case Instruction::IPUT_SHORT:
+      case Instruction::IPUT_SHORT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true);
         break;
 
       case Instruction::IPUT_WIDE:
+      case Instruction::IPUT_WIDE_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true);
         break;
 
       case Instruction::IPUT_OBJECT:
+      case Instruction::IPUT_OBJECT_QUICK:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true);
         break;
 
       case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false);
         break;
 
       case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true);
         break;
 
       case Instruction::NOP:
-        // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
-        // index in the map for normal nops. This should be rare in real code.
-        quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        if (already_quickened_) {
+          const uint16_t reference_index = NextIndex();
+          quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index));
+          if (reference_index == DexFile::kDexNoIndex16) {
+            // This means it was a normal nop and not a check-cast.
+            break;
+          }
+          const uint16_t type_index = NextIndex();
+          if (driver_.IsSafeCast(&unit_, dex_pc)) {
+            quickened_info_.push_back(QuickenedInfo(dex_pc, type_index));
+          }
+          ++it;
+        } else {
+          // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
+          // index in the map for normal nops. This should be rare in real code.
+          quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        }
         break;
 
       default:
-        DCHECK(!inst->IsQuickened());
         // Nothing to do.
         break;
     }
   }
+
+  if (already_quickened_) {
+    DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices());
+  }
+
+  if (GetQuickenedInfo().empty()) {
+    // No need to create a CompiledMethod if there are no quickened opcodes.
+    return std::vector<uint8_t>();
+  }
+
+  std::vector<uint8_t> quicken_data;
+  if (kIsDebugBuild) {
+    // Double check that the counts line up with the size of the quicken info.
+    size_t quicken_count = 0;
+    for (const DexInstructionPcPair& pair : instructions) {
+      if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
+        ++quicken_count;
+      }
+    }
+    CHECK_EQ(quicken_count, GetQuickenedInfo().size());
+  }
+
+  QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size());
+  // Length is encoded by the constructor.
+  for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) {
+    // Dex pc is not serialized, only used for checking the instructions. Since we access the
+    // array based on the index of the quickened instruction, the indexes must line up perfectly.
+    // The reader side uses the NeedsIndexForInstruction function too.
+    const Instruction& inst = instructions.InstructionAt(info.dex_pc);
+    CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
+    builder.AddIndex(info.dex_member_index);
+  }
+  DCHECK(!quicken_data.empty());
+  return quicken_data;
 }
 
-void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
+void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
   DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID);
   if (unit_.IsConstructor()) {
     // Are we compiling a non clinit constructor which needs a barrier ?
@@ -229,7 +318,8 @@
   inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
 }
 
-Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
+Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst,
+                                                                  uint32_t dex_pc) {
   if (!kEnableCheckCastEllision) {
     return inst;
   }
@@ -246,27 +336,30 @@
                  << " by replacing it with 2 NOPs at dex pc "
                  << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
-  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
-  // Get to next instruction which is the second half of check-cast and replace
-  // it by a NOP.
-  inst = const_cast<Instruction*>(inst->Next());
-  inst->SetOpcode(Instruction::NOP);
-  inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  if (!already_quickened_) {
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
+    quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
+
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+    // Get to next instruction which is the second half of check-cast and replace
+    // it by a NOP.
+    inst = const_cast<Instruction*>(inst->Next());
+    inst->SetOpcode(Instruction::NOP);
+    inst->SetVRegA_10x(0u);  // keep compliant with verifier.
+  }
   return inst;
 }
 
-void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
-                                             uint32_t dex_pc,
-                                             Instruction::Code new_opcode,
-                                             bool is_put) {
+void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst,
+                                                                    uint32_t dex_pc,
+                                                                    Instruction::Code new_opcode,
+                                                                    bool is_put) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t field_idx = inst->VRegC_22c();
+  uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c());
   MemberOffset field_offset(0u);
   bool is_volatile;
   bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
@@ -278,20 +371,29 @@
                    << " by field offset " << field_offset.Int32Value()
                    << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                    << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-    // We are modifying 4 consecutive bytes.
-    inst->SetOpcode(new_opcode);
-    // Replace field index by field offset.
-    inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    if (!already_quickened_) {
+      // We are modifying 4 consecutive bytes.
+      inst->SetOpcode(new_opcode);
+      // Replace field index by field offset.
+      inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value()));
+    }
     quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx));
   }
 }
 
-void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                                       Instruction::Code new_opcode, bool is_range) {
+const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const {
+  return *unit_.GetDexFile();
+}
+
+void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst,
+                                                              uint32_t dex_pc,
+                                                              Instruction::Code new_opcode,
+                                                              bool is_range) {
   if (!kEnableQuickening) {
     return;
   }
-  uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
+  uint32_t method_idx = GetIndexForInstruction(inst,
+                                               is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
   ScopedObjectAccess soa(Thread::Current());
 
   ClassLinker* class_linker = unit_.GetClassLinker();
@@ -318,19 +420,20 @@
                  << " by vtable index " << vtable_idx
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
-  // We are modifying 4 consecutive bytes.
-  inst->SetOpcode(new_opcode);
-  // Replace method index by vtable index.
-  if (is_range) {
-    inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
-  } else {
-    inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+  if (!already_quickened_) {
+    // We are modifying 4 consecutive bytes.
+    inst->SetOpcode(new_opcode);
+    // Replace method index by vtable index.
+    if (is_range) {
+      inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx));
+    } else {
+      inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx));
+    }
   }
   quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx));
 }
 
-CompiledMethod* ArtCompileDEX(
-    CompilerDriver* driver,
+CompiledMethod* DexToDexCompiler::CompileMethod(
     const DexFile::CodeItem* code_item,
     uint32_t access_flags,
     InvokeType invoke_type ATTRIBUTE_UNUSED,
@@ -338,69 +441,122 @@
     uint32_t method_idx,
     Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file,
-    DexToDexCompilationLevel dex_to_dex_compilation_level) {
-  DCHECK(driver != nullptr);
-  if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    art::DexCompilationUnit unit(
-        class_loader,
-        class_linker,
-        dex_file,
-        code_item,
-        class_def_idx,
-        method_idx,
-        access_flags,
-        driver->GetVerifiedMethod(&dex_file, method_idx),
-        hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
-    art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level);
-    dex_compiler.Compile();
-    if (dex_compiler.GetQuickenedInfo().empty()) {
-      // No need to create a CompiledMethod if there are no quickened opcodes.
+    CompilationLevel compilation_level) {
+  if (compilation_level == CompilationLevel::kDontDexToDexCompile) {
+    return nullptr;
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  art::DexCompilationUnit unit(
+      class_loader,
+      class_linker,
+      dex_file,
+      code_item,
+      class_def_idx,
+      method_idx,
+      access_flags,
+      driver_->GetVerifiedMethod(&dex_file, method_idx),
+      hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)));
+
+  std::vector<uint8_t> quicken_data;
+  // If the code item is shared with multiple different method ids, make sure that we quicken only
+  // once and verify that all the dequicken maps match.
+  if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) {
+    // For shared code items, use a lock to prevent races.
+    MutexLock mu(soa.Self(), lock_);
+    // Blacklisted means there was a quickening conflict previously, bail early.
+    if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) {
       return nullptr;
     }
+    auto existing = shared_code_item_quicken_info_.find(code_item);
+    const bool already_quickened = existing != shared_code_item_quicken_info_.end();
+    {
+      CompilationState state(this,
+                             unit,
+                             compilation_level,
+                             already_quickened ? &existing->second.quicken_data_ : nullptr);
+      quicken_data = state.Compile();
+    }
 
-    // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    if (kIsDebugBuild) {
-      // Double check that the counts line up with the size of the quicken info.
-      size_t quicken_count = 0;
-      for (const DexInstructionPcPair& pair : unit.GetCodeItemAccessor()) {
-        if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) {
-          ++quicken_count;
+    // Already quickened, check that the data matches what was previously seen.
+    MethodReference method_ref(&dex_file, method_idx);
+    if (already_quickened) {
+      QuickenState* const existing_data = &existing->second;
+      if (existing_data->quicken_data_ != quicken_data) {
+        VLOG(compiler) << "Quicken data mismatch, dequickening method "
+                       << dex_file.PrettyMethod(method_idx);
+        // Unquicken using the existing quicken data.
+        optimizer::ArtDecompileDEX(dex_file,
+                                   *code_item,
+                                   ArrayRef<const uint8_t>(existing_data->quicken_data_),
+                                   /* decompile_return_instruction*/ false);
+        // Go clear the vmaps for all the methods that were already quickened to avoid writing them
+        // out during oat writing.
+        for (const MethodReference& ref : existing_data->methods_) {
+          CompiledMethod* method = driver_->GetCompiledMethod(ref);
+          DCHECK(method != nullptr);
+          method->ReleaseVMapTable();
         }
+        // Blacklist the method to never attempt to quicken it in the future.
+        blacklisted_code_items_.insert(code_item);
+        shared_code_item_quicken_info_.erase(existing);
+        return nullptr;
       }
-      CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size());
+      existing_data->methods_.push_back(method_ref);
+    } else {
+      QuickenState new_state;
+      new_state.methods_.push_back(method_ref);
+      new_state.quicken_data_ = quicken_data;
+      bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second;
+      CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx);
     }
-    std::vector<uint8_t> quicken_data;
-    QuickenInfoTable::Builder builder(&quicken_data, dex_compiler.GetQuickenedInfo().size());
-    // Length is encoded by the constructor.
-    for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
-      // Dex pc is not serialized, only used for checking the instructions. Since we access the
-      // array based on the index of the quickened instruction, the indexes must line up perfectly.
-      // The reader side uses the NeedsIndexForInstruction function too.
-      const Instruction& inst = unit.GetCodeItemAccessor().InstructionAt(info.dex_pc);
-      CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode();
-      builder.AddIndex(info.dex_member_index);
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    // Note that this needs to be behind the lock for this case since we may unquicken in another
+    // thread.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    InstructionSet instruction_set = driver->GetInstructionSet();
-    if (instruction_set == InstructionSet::kThumb2) {
-      // Don't use the thumb2 instruction set to avoid the one off code delta.
-      instruction_set = InstructionSet::kArm;
+  } else {
+    CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr);
+    quicken_data = state.Compile();
+
+    // Easy sanity check is to check that the existing stuff matches by re-quickening using the
+    // newly produced quicken data.
+    if (kIsDebugBuild) {
+      CompilationState state2(this, unit, compilation_level, &quicken_data);
+      std::vector<uint8_t> new_data = state2.Compile();
+      CHECK(new_data == quicken_data) << "Mismatch producing new quicken data";
     }
-    return CompiledMethod::SwapAllocCompiledMethod(
-        driver,
-        instruction_set,
-        ArrayRef<const uint8_t>(),                   // no code
-        0,
-        0,
-        0,
-        ArrayRef<const uint8_t>(),                   // method_info
-        ArrayRef<const uint8_t>(quicken_data),       // vmap_table
-        ArrayRef<const uint8_t>(),                   // cfi data
-        ArrayRef<const linker::LinkerPatch>());
   }
-  return nullptr;
+
+  if (quicken_data.empty()) {
+    return nullptr;
+  }
+
+  // Create a `CompiledMethod`, with the quickened information in the vmap table.
+  InstructionSet instruction_set = driver_->GetInstructionSet();
+  if (instruction_set == InstructionSet::kThumb2) {
+    // Don't use the thumb2 instruction set to avoid the one off code delta.
+    instruction_set = InstructionSet::kArm;
+  }
+  CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod(
+      driver_,
+      instruction_set,
+      ArrayRef<const uint8_t>(),                   // no code
+      0,
+      0,
+      0,
+      ArrayRef<const uint8_t>(),                   // method_info
+      ArrayRef<const uint8_t>(quicken_data),       // vmap_table
+      ArrayRef<const uint8_t>(),                   // cfi data
+      ArrayRef<const linker::LinkerPatch>());
+  return ret;
 }
 
 }  // namespace optimizer

diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 80b94d2..abd0481 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h

@@ -17,14 +17,22 @@
 #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "base/bit_vector.h"
 #include "dex/dex_file.h"
 #include "handle.h"
 #include "invoke_type.h"
+#include "method_reference.h"
+#include "quicken_info.h"
 
 namespace art {
 
 class CompiledMethod;
 class CompilerDriver;
+class DexCompilationUnit;
 
 namespace mirror {
 class ClassLoader;
@@ -32,21 +40,144 @@
 
 namespace optimizer {
 
-enum class DexToDexCompilationLevel {
-  kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
-  kOptimize               // Perform peep-hole optimizations.
-};
-std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
+class DexToDexCompiler {
+ public:
+  enum class CompilationLevel {
+    kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
+    kOptimize               // Perform peep-hole optimizations.
+  };
 
-CompiledMethod* ArtCompileDEX(CompilerDriver* driver,
-                              const DexFile::CodeItem* code_item,
-                              uint32_t access_flags,
-                              InvokeType invoke_type,
-                              uint16_t class_def_idx,
-                              uint32_t method_idx,
-                              Handle<mirror::ClassLoader> class_loader,
-                              const DexFile& dex_file,
-                              DexToDexCompilationLevel dex_to_dex_compilation_level);
+  explicit DexToDexCompiler(CompilerDriver* driver);
+
+  CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item,
+                                uint32_t access_flags,
+                                InvokeType invoke_type,
+                                uint16_t class_def_idx,
+                                uint32_t method_idx,
+                                Handle<mirror::ClassLoader> class_loader,
+                                const DexFile& dex_file,
+                                const CompilationLevel compilation_level) WARN_UNUSED;
+
+  void MarkForCompilation(Thread* self,
+                          const MethodReference& method_ref,
+                          const DexFile::CodeItem* code_item);
+
+  void ClearState();
+
+  CompilerDriver* GetDriver() {
+    return driver_;
+  }
+
+  bool ShouldCompileMethod(const MethodReference& ref);
+
+  size_t NumUniqueCodeItems(Thread* self) const;
+
+ private:
+  // Holds the state for compiling a single method.
+  struct CompilationState {
+    struct QuickenedInfo {
+      QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+
+      uint32_t dex_pc;
+      uint16_t dex_member_index;
+    };
+
+    CompilationState(DexToDexCompiler* compiler,
+                     const DexCompilationUnit& unit,
+                     const CompilationLevel compilation_level,
+                     const std::vector<uint8_t>* quicken_data);
+
+    const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
+      return quickened_info_;
+    }
+
+    // Returns the quickening info, or an empty array if it was not quickened.
+    // If already_quickened is true, then don't change anything but still return what the quicken
+    // data would have been.
+    std::vector<uint8_t> Compile();
+
+    const DexFile& GetDexFile() const;
+
+    // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
+    // a barrier is required.
+    void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
+    // this case, returns the second NOP instruction pointer. Otherwise, returns
+    // the given "inst".
+    Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
+
+    // Compiles a field access into a quick field access.
+    // The field index is replaced by an offset within an Object where we can read
+    // from / write to this field. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the field index is encoded with 16 bits, we can replace it only if the
+    // field offset can be encoded with 16 bits too.
+    void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
+                                    Instruction::Code new_opcode, bool is_put);
+
+    // Compiles a virtual method invocation into a quick virtual method invocation.
+    // The method index is replaced by the vtable index where the corresponding
+    // executable can be found. Therefore, this does not involve any resolution
+    // at runtime.
+    // Since the method index is encoded with 16 bits, we can replace it only if the
+    // vtable index can be encoded with 16 bits too.
+    void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
+                              Instruction::Code new_opcode, bool is_range);
+
+    // Return the next index.
+    uint16_t NextIndex();
+
+    // Returns the dequickened index if an instruction is quickened, otherwise return index.
+    uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index);
+
+    DexToDexCompiler* const compiler_;
+    CompilerDriver& driver_;
+    const DexCompilationUnit& unit_;
+    const CompilationLevel compilation_level_;
+
+    // Filled by the compiler when quickening, in order to encode that information
+    // in the .oat file. The runtime will use that information to get to the original
+    // opcodes.
+    std::vector<QuickenedInfo> quickened_info_;
+
+    // If the code item was already quickened previously.
+    const bool already_quickened_;
+    const QuickenInfoTable existing_quicken_info_;
+    uint32_t quicken_index_ = 0u;
+
+    DISALLOW_COPY_AND_ASSIGN(CompilationState);
+  };
+
+  struct QuickenState {
+    std::vector<MethodReference> methods_;
+    std::vector<uint8_t> quicken_data_;
+  };
+
+  BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_);
+
+  CompilerDriver* const driver_;
+
+  // State for adding methods (should this be in its own class?).
+  const DexFile* active_dex_file_ = nullptr;
+  BitVector* active_bit_vector_ = nullptr;
+
+  // Lock that guards duplicate code items and the bitmap.
+  mutable Mutex lock_;
+  // Record what method references are going to get quickened.
+  std::unordered_map<const DexFile*, BitVector> should_quicken_;
+  // Record what code items are already seen to detect when multiple methods have the same code
+  // item.
+  std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_);
+  // Guarded by lock_ during writing, accessed without a lock during quickening.
+  // This is safe because no thread is adding to the shared code items during the quickening phase.
+  std::unordered_set<const DexFile::CodeItem*> shared_code_items_;
+  std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_);
+  std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_
+      GUARDED_BY(lock_);
+};
+
+std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs);
 
 }  // namespace optimizer
 

diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 8698659..6c5cc50 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc

@@ -255,24 +255,6 @@
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
-class CompilerDriver::DexFileMethodSet {
- public:
-  explicit DexFileMethodSet(const DexFile& dex_file)
-    : dex_file_(dex_file),
-      method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) {
-  }
-  DexFileMethodSet(DexFileMethodSet&& other) = default;
-
-  const DexFile& GetDexFile() const { return dex_file_; }
-
-  BitVector& GetMethodIndexes() { return method_indexes_; }
-  const BitVector& GetMethodIndexes() const { return method_indexes_; }
-
- private:
-  const DexFile& dex_file_;
-  BitVector method_indexes_;
-};
-
 CompilerDriver::CompilerDriver(
     const CompilerOptions* compiler_options,
     VerificationResults* verification_results,
@@ -306,9 +288,8 @@
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
       max_arena_alloc_(0),
-      dex_to_dex_references_lock_("dex-to-dex references lock"),
-      dex_to_dex_references_(),
-      current_dex_to_dex_methods_(nullptr) {
+      compiling_dex_to_dex_(false),
+      dex_to_dex_compiler_(this) {
   DCHECK(compiler_options_ != nullptr);
 
   compiler_->Init();
@@ -398,7 +379,7 @@
   FreeThreadPools();
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -410,7 +391,7 @@
   if (klass == nullptr) {
     CHECK(self->IsExceptionPending());
     self->ClearException();
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic
   // references with actual offsets. We cannot re-verify such instructions.
@@ -418,22 +399,23 @@
   // We store the verification information in the class status in the oat file, which the linker
   // can validate (checksums) and use to skip load-time verification. It is thus safe to
   // optimize when a class has been fully verified before.
-  optimizer::DexToDexCompilationLevel max_level = optimizer::DexToDexCompilationLevel::kOptimize;
+  optimizer::DexToDexCompiler::CompilationLevel max_level =
+      optimizer::DexToDexCompiler::CompilationLevel::kOptimize;
   if (driver.GetCompilerOptions().GetDebuggable()) {
     // We are debuggable so definitions of classes might be changed. We don't want to do any
     // optimizations that could break that.
-    max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
   } else {
     // Class verification has failed: do not run DEX-to-DEX optimizations.
-    return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
+    return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile;
   }
 }
 
-static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
+static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel(
     Thread* self,
     const CompilerDriver& driver,
     jobject jclass_loader,
@@ -470,7 +452,7 @@
                           uint32_t method_idx,
                           Handle<mirror::ClassLoader> class_loader,
                           const DexFile& dex_file,
-                          optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
+                          optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
@@ -478,18 +460,18 @@
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if (driver->GetCurrentDexToDexMethods() != nullptr) {
+  if (driver->GetCompilingDexToDex()) {
+    optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler();
     // This is the second pass when we dex-to-dex compile previously marked methods.
     // TODO: Refactor the compilation to avoid having to distinguish the two passes
     // here. That should be done on a higher level. http://b/29089975
-    if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) {
+    if (compiler->ShouldCompileMethod(method_ref)) {
       VerificationResults* results = driver->GetVerificationResults();
       DCHECK(results != nullptr);
       const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
       // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
       // for example, relies on it.
-      compiled_method = optimizer::ArtCompileDEX(
-          driver,
+      compiled_method = compiler->CompileMethod(
           code_item,
           access_flags,
           invoke_type,
@@ -499,7 +481,7 @@
           dex_file,
           (verified_method != nullptr)
               ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
+              : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile);
     }
   } else if ((access_flags & kAccNative) != 0) {
     // Are we extracting only and have support for generic JNI down calls?
@@ -524,7 +506,7 @@
     bool compile = compilation_enabled &&
         // Basic checks, e.g., not <clinit>.
         results->IsCandidateForCompilation(method_ref, access_flags) &&
-        // Did not fail to create VerifiedMethod metadata.
+        // Did not fail to create VerifiedMethod metadcata.
         verified_method != nullptr &&
         // Do not have failures that should punt to the interpreter.
         !verified_method->HasRuntimeThrow() &&
@@ -546,10 +528,12 @@
                                                        dex_cache);
     }
     if (compiled_method == nullptr &&
-        dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
+        dex_to_dex_compilation_level !=
+            optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) {
       DCHECK(!Runtime::Current()->UseJitCompilation());
+      DCHECK(!driver->GetCompilingDexToDex());
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      driver->MarkForDexToDexCompilation(self, method_ref);
+      driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item);
     }
   }
   if (kTimeCompileMethod) {
@@ -616,14 +600,14 @@
   PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+  optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
       GetDexToDexCompilationLevel(self,
                                   *this,
                                   jclass_loader,
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
-  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  DCHECK(!compiling_dex_to_dex_);
   CompileMethod(self,
                 this,
                 code_item,
@@ -637,19 +621,10 @@
                 true,
                 dex_cache);
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
-  }
-  if (!dex_to_dex_references.empty()) {
-    DCHECK_EQ(dex_to_dex_references.size(), 1u);
-    DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file);
-    current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes();
-    DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx));
-    DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u);
+  const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self);
+  if (num_methods != 0) {
+    DCHECK_EQ(num_methods, 1u);
+    compiling_dex_to_dex_ = true;
     CompileMethod(self,
                   this,
                   code_item,
@@ -662,7 +637,8 @@
                   dex_to_dex_compilation_level,
                   true,
                   dex_cache);
-    current_dex_to_dex_methods_ = nullptr;
+    compiling_dex_to_dex_ = false;
+    dex_to_dex_compiler_.ClearState();
   }
 
   FreeThreadPools();
@@ -1280,17 +1256,6 @@
   return IsImageClass(descriptor);
 }
 
-void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) {
-  MutexLock lock(self, dex_to_dex_references_lock_);
-  // Since we're compiling one dex file at a time, we need to look for the
-  // current dex file entry only at the end of dex_to_dex_references_.
-  if (dex_to_dex_references_.empty() ||
-      &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) {
-    dex_to_dex_references_.emplace_back(*method_ref.dex_file);
-  }
-  dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.index);
-}
-
 bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
                                                 ObjPtr<mirror::Class> resolved_class) {
   if (resolved_class == nullptr) {
@@ -2612,14 +2577,8 @@
             : profile_compilation_info_->DumpInfo(&dex_files));
   }
 
-  current_dex_to_dex_methods_ = nullptr;
-  Thread* const self = Thread::Current();
-  {
-    // Clear in case we aren't the first call to Compile.
-    MutexLock mu(self, dex_to_dex_references_lock_);
-    dex_to_dex_references_.clear();
-  }
-
+  dex_to_dex_compiler_.ClearState();
+  compiling_dex_to_dex_ = false;
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader,
@@ -2634,23 +2593,21 @@
     Runtime::Current()->ReclaimArenaPoolMemory();
   }
 
-  ArrayRef<DexFileMethodSet> dex_to_dex_references;
-  {
-    // From this point on, we shall not modify dex_to_dex_references_, so
-    // just grab a reference to it that we use without holding the mutex.
-    MutexLock lock(self, dex_to_dex_references_lock_);
-    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) {
+    compiling_dex_to_dex_ = true;
+    // TODO: Not visit all of the dex files, its probably rare that only one would have quickened
+    // methods though.
+    for (const DexFile* dex_file : dex_files) {
+      CompileDexFile(class_loader,
+                     *dex_file,
+                     dex_files,
+                     parallel_thread_pool_.get(),
+                     parallel_thread_count_,
+                     timings);
+    }
+    dex_to_dex_compiler_.ClearState();
+    compiling_dex_to_dex_ = false;
   }
-  for (const auto& method_set : dex_to_dex_references) {
-    current_dex_to_dex_methods_ = &method_set.GetMethodIndexes();
-    CompileDexFile(class_loader,
-                   method_set.GetDexFile(),
-                   dex_files,
-                   parallel_thread_pool_.get(),
-                   parallel_thread_count_,
-                   timings);
-  }
-  current_dex_to_dex_methods_ = nullptr;
 
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2701,7 +2658,7 @@
     CompilerDriver* const driver = manager_->GetCompiler();
 
     // Can we run DEX-to-DEX compiler on this class ?
-    optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
         GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def);
 
     ClassDataItemIterator it(dex_file, class_data);

diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ef16212..87a8a18 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h

@@ -35,6 +35,7 @@
 #include "compiler.h"
 #include "dex/dex_file.h"
 #include "dex/dex_file_types.h"
+#include "dex/dex_to_dex_compiler.h"
 #include "driver/compiled_method_storage.h"
 #include "jit/profile_compilation_info.h"
 #include "method_reference.h"
@@ -120,12 +121,11 @@
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!Locks::mutator_lock_);
 
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_to_dex_references_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   VerificationResults* GetVerificationResults() const;
 
@@ -362,13 +362,6 @@
     return true;
   }
 
-  void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref)
-      REQUIRES(!dex_to_dex_references_lock_);
-
-  const BitVector* GetCurrentDexToDexMethods() const {
-    return current_dex_to_dex_methods_;
-  }
-
   const ProfileCompilationInfo* GetProfileCompilationInfo() const {
     return profile_compilation_info_;
   }
@@ -381,6 +374,14 @@
         || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
   }
 
+  bool GetCompilingDexToDex() const {
+    return compiling_dex_to_dex_;
+  }
+
+  optimizer::DexToDexCompiler& GetDexToDexCompiler() {
+    return dex_to_dex_compiler_;
+  }
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
@@ -447,7 +448,7 @@
 
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
-               TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_);
+               TimingLogger* timings);
   void CompileDexFile(jobject class_loader,
                       const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
@@ -539,14 +540,9 @@
 
   size_t max_arena_alloc_;
 
-  // Data for delaying dex-to-dex compilation.
-  Mutex dex_to_dex_references_lock_;
-  // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation.
-  class DexFileMethodSet;
-  std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_);
-  // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method
-  // indexes for dex-to-dex compilation in the current dex file.
-  const BitVector* current_dex_to_dex_methods_;
+  // Compiler for dex to dex (quickening).
+  bool compiling_dex_to_dex_;
+  optimizer::DexToDexCompiler dex_to_dex_compiler_;
 
   friend class CompileClassVisitor;
   friend class DexToDexDecompilerTest;

diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7077298..d541b79 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h

@@ -101,6 +101,7 @@
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kDexToDexCompilerLock,
   kJitCodeCacheLock,
   kCHALock,
   kSubtypeCheckLock,

diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 118cffe..cab91df 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc

@@ -19,6 +19,7 @@
 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
 
 #include <memory>
+#include <unordered_set>
 
 #include <android-base/logging.h>
 
@@ -265,6 +266,8 @@
     // RETURN_VOID_NO_BARRIER instructions to RETURN_VOID instructions.
     return;
   }
+  // Make sure to not unquicken the same code item multiple times.
+  std::unordered_set<const DexFile::CodeItem*> unquickened_code_item;
   for (uint32_t i = 0; i < target_dex_file.NumClassDefs(); ++i) {
     const DexFile::ClassDef& class_def = target_dex_file.GetClassDef(i);
     const uint8_t* class_data = target_dex_file.GetClassData(class_def);
@@ -274,6 +277,10 @@
            class_it.Next()) {
         if (class_it.IsAtMethod() && class_it.GetMethodCodeItem() != nullptr) {
           const DexFile::CodeItem* code_item = class_it.GetMethodCodeItem();
+          if (!unquickened_code_item.emplace(code_item).second) {
+            // Already unquickened this code item, do not do it again.
+            continue;
+          }
           ArrayRef<const uint8_t> quicken_data;
           if (!quickening_info.empty()) {
             const uint32_t quickening_offset = GetQuickeningInfoOffset(
commit	a79efdb69350fa66e1beabed4499ef4d0e809785	[log] [tgz]
author	Mathieu Chartier <mathieuc@google.com>	Thu Jan 18 16:31:01 2018 -0800
committer	Mathieu Chartier <mathieuc@google.com>	Mon Jan 22 08:46:37 2018 -0800
tree	0a7491e10f646075b2d535cf69e13145224bb2ca
parent	ccc5401bf082e4292a2cb75eeeb216a441189aef [diff]