35 files changed, 1279 insertions, 585 deletions
diff --git a/Android.mk b/Android.mk
index 0a90a0bb24..361ceecc2f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -447,6 +447,19 @@ LOCAL_REQUIRED_MODULES := libopenjdkd
 include $(BUILD_PHONY_PACKAGE)
 endif
 
+# Create dummy hidden API lists which are normally generated by the framework
+# but which we do not have in the master-art manifest.
+# We need to execute this now to ensure Makefile rules depending on these files can
+# be constructed.
+define build-art-hiddenapi
+$(shell if [ ! -d frameworks/base ]; then \
+  mkdir -p ${TARGET_OUT_COMMON_INTERMEDIATES}/PACKAGING; \
+	touch ${TARGET_OUT_COMMON_INTERMEDIATES}/PACKAGING/hiddenapi-{blacklist,dark-greylist,light-greylist}.txt; \
+  fi;)
+endef
+
+$(eval $(call build-art-hiddenapi))
+
 ########################################################################
 # "m build-art" for quick minimal build
 .PHONY: build-art
@@ -460,6 +473,7 @@ build-art-target: $(TARGET_OUT_EXECUTABLES)/art $(ART_TARGET_DEPENDENCIES) $(TAR
 
 ########################################################################
 # Phony target for only building what go/lem requires for pushing ART on /data.
+
 .PHONY: build-art-target-golem
 # Also include libartbenchmark, we always include it when running golem.
 # libstdc++ is needed when building for ART_TARGET_LINUX.
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 0f69dbab94..e41371855d 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -159,10 +159,4 @@ CompiledMethod::~CompiledMethod() {
   storage->ReleaseMethodInfo(method_info_);
 }
 
-void CompiledMethod::ReleaseVMapTable() {
-  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
-  storage->ReleaseVMapTable(vmap_table_);
-  vmap_table_ = nullptr;
-}
-
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 4e8f3efe5a..acdce260e5 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -168,10 +168,6 @@ class CompiledMethod FINAL : public CompiledCode {
 
   ArrayRef<const linker::LinkerPatch> GetPatches() const;
 
-  // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap
-  // table to avoid writing the quicken info to the vdex file.
-  void ReleaseVMapTable();
-
  private:
   static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
   static constexpr size_t kIsIntrinsicSize = 1u;
@@ -190,7 +186,7 @@ class CompiledMethod FINAL : public CompiledCode {
   // For quick code, method specific information that is not very dedupe friendly (method indices).
   const LengthPrefixedArray<uint8_t>* const method_info_;
   // For quick code, holds code infos which contain stack maps, inline information, and etc.
-  const LengthPrefixedArray<uint8_t>* vmap_table_;
+  const LengthPrefixedArray<uint8_t>* const vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 9c9e8b35b8..4b19547d28 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -101,7 +101,7 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder,
       }
     }
   }
-  // Add symbols for interpreted methods (with address range of the method's bytecode).
+  // Add symbols for dex files.
   if (!debug_info.dex_files.empty() && builder->GetDex()->Exists()) {
     auto dex = builder->GetDex();
     for (auto it : debug_info.dex_files) {
@@ -109,33 +109,6 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder,
       const DexFile* dex_file = it.second;
       typename ElfTypes::Word dex_name = strtab->Write(kDexFileSymbolName);
       symtab->Add(dex_name, dex, dex_address, dex_file->Size(), STB_GLOBAL, STT_FUNC);
-      if (mini_debug_info) {
-        continue;  // Don't add interpreter method names to mini-debug-info for now.
-      }
-      for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
-        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-        const uint8_t* class_data = dex_file->GetClassData(class_def);
-        if (class_data == nullptr) {
-          continue;
-        }
-        for (ClassDataItemIterator item(*dex_file, class_data); item.HasNext(); item.Next()) {
-          if (!item.IsAtMethod()) {
-            continue;
-          }
-          const DexFile::CodeItem* code_item = item.GetMethodCodeItem();
-          if (code_item == nullptr) {
-            continue;
-          }
-          CodeItemInstructionAccessor code(*dex_file, code_item);
-          DCHECK(code.HasCodeItem());
-          std::string name = dex_file->PrettyMethod(item.GetMemberIndex(), !mini_debug_info);
-          size_t name_offset = strtab->Write(name);
-          uint64_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - dex_file->Begin();
-          uint64_t address = dex_address + offset;
-          size_t size = code.InsnsSizeInCodeUnits() * sizeof(uint16_t);
-          symtab->Add(name_offset, dex, address, size, STB_GLOBAL, STT_FUNC);
-        }
-      }
     }
   }
   strtab->End();
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 28c7fe2c34..9f0aaa4e10 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -45,6 +45,85 @@ const bool kEnableQuickening = true;
 // Control check-cast elision.
 const bool kEnableCheckCastEllision = true;
 
+// Holds the state for compiling a single method.
+struct DexToDexCompiler::CompilationState {
+  struct QuickenedInfo {
+    QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
+
+    uint32_t dex_pc;
+    uint16_t dex_member_index;
+  };
+
+  CompilationState(DexToDexCompiler* compiler,
+                   const DexCompilationUnit& unit,
+                   const CompilationLevel compilation_level,
+                   const std::vector<uint8_t>* quicken_data);
+
+  const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
+    return quickened_info_;
+  }
+
+  // Returns the quickening info, or an empty array if it was not quickened.
+  // If already_quickened is true, then don't change anything but still return what the quicken
+  // data would have been.
+  std::vector<uint8_t> Compile();
+
+  const DexFile& GetDexFile() const;
+
+  // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
+  // a barrier is required.
+  void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
+
+  // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
+  // this case, returns the second NOP instruction pointer. Otherwise, returns
+  // the given "inst".
+  Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
+
+  // Compiles a field access into a quick field access.
+  // The field index is replaced by an offset within an Object where we can read
+  // from / write to this field. Therefore, this does not involve any resolution
+  // at runtime.
+  // Since the field index is encoded with 16 bits, we can replace it only if the
+  // field offset can be encoded with 16 bits too.
+  void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
+                                  Instruction::Code new_opcode, bool is_put);
+
+  // Compiles a virtual method invocation into a quick virtual method invocation.
+  // The method index is replaced by the vtable index where the corresponding
+  // executable can be found. Therefore, this does not involve any resolution
+  // at runtime.
+  // Since the method index is encoded with 16 bits, we can replace it only if the
+  // vtable index can be encoded with 16 bits too.
+  void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
+                            Instruction::Code new_opcode, bool is_range);
+
+  // Return the next index.
+  uint16_t NextIndex();
+
+  // Returns the dequickened index if an instruction is quickened, otherwise return index.
+  uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index);
+
+  DexToDexCompiler* const compiler_;
+  CompilerDriver& driver_;
+  const DexCompilationUnit& unit_;
+  const CompilationLevel compilation_level_;
+
+  // Filled by the compiler when quickening, in order to encode that information
+  // in the .oat file. The runtime will use that information to get to the original
+  // opcodes.
+  std::vector<QuickenedInfo> quickened_info_;
+
+  // True if we optimized a return void to a return void no barrier.
+  bool optimized_return_void_ = false;
+
+  // If the code item was already quickened previously.
+  const bool already_quickened_;
+  const QuickenInfoTable existing_quicken_info_;
+  uint32_t quicken_index_ = 0u;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilationState);
+};
+
 DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver)
     : driver_(driver),
       lock_("Quicken lock", kDexToDexCompilerLock) {
@@ -55,16 +134,13 @@ void DexToDexCompiler::ClearState() {
   MutexLock lock(Thread::Current(), lock_);
   active_dex_file_ = nullptr;
   active_bit_vector_ = nullptr;
-  seen_code_items_.clear();
   should_quicken_.clear();
-  shared_code_items_.clear();
-  blacklisted_code_items_.clear();
   shared_code_item_quicken_info_.clear();
 }
 
-size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const {
+size_t DexToDexCompiler::NumCodeItemsToQuicken(Thread* self) const {
   MutexLock lock(self, lock_);
-  return seen_code_items_.size();
+  return num_code_items_;
 }
 
 BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) {
@@ -80,17 +156,13 @@ BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) {
 }
 
 void DexToDexCompiler::MarkForCompilation(Thread* self,
-                                          const MethodReference& method_ref,
-                                          const DexFile::CodeItem* code_item) {
+                                          const MethodReference& method_ref) {
   MutexLock lock(self, lock_);
   BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file);
   DCHECK(bitmap != nullptr);
   DCHECK(!bitmap->IsBitSet(method_ref.index));
   bitmap->SetBit(method_ref.index);
-  // Detect the shared code items.
-  if (!seen_code_items_.insert(code_item).second) {
-    shared_code_items_.insert(code_item);
-  }
+  ++num_code_items_;
 }
 
 DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler,
@@ -316,6 +388,7 @@ void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, ui
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
   inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
+  optimized_return_void_ = true;
 }
 
 Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst,
@@ -464,51 +537,48 @@ CompiledMethod* DexToDexCompiler::CompileMethod(
   // If the code item is shared with multiple different method ids, make sure that we quicken only
   // once and verify that all the dequicken maps match.
   if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) {
-    // For shared code items, use a lock to prevent races.
-    MutexLock mu(soa.Self(), lock_);
-    // Blacklisted means there was a quickening conflict previously, bail early.
-    if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) {
+    // Avoid quickening the shared code items for now because the existing conflict detection logic
+    // does not currently handle cases where the code item is quickened in one place but
+    // compiled in another.
+    static constexpr bool kAvoidQuickeningSharedCodeItems = true;
+    if (kAvoidQuickeningSharedCodeItems) {
       return nullptr;
     }
+    // For shared code items, use a lock to prevent races.
+    MutexLock mu(soa.Self(), lock_);
     auto existing = shared_code_item_quicken_info_.find(code_item);
-    const bool already_quickened = existing != shared_code_item_quicken_info_.end();
+    QuickenState* existing_data = nullptr;
+    std::vector<uint8_t>* existing_quicken_data = nullptr;
+    if (existing != shared_code_item_quicken_info_.end()) {
+      existing_data = &existing->second;
+      if (existing_data->conflict_) {
+        return nullptr;
+      }
+      existing_quicken_data = &existing_data->quicken_data_;
+    }
+    bool optimized_return_void;
     {
-      CompilationState state(this,
-                             unit,
-                             compilation_level,
-                             already_quickened ? &existing->second.quicken_data_ : nullptr);
+      CompilationState state(this, unit, compilation_level, existing_quicken_data);
       quicken_data = state.Compile();
+      optimized_return_void = state.optimized_return_void_;
     }
 
     // Already quickened, check that the data matches what was previously seen.
     MethodReference method_ref(&dex_file, method_idx);
-    if (already_quickened) {
-      QuickenState* const existing_data = &existing->second;
-      if (existing_data->quicken_data_ != quicken_data) {
-        VLOG(compiler) << "Quicken data mismatch, dequickening method "
+    if (existing_data != nullptr) {
+      if (*existing_quicken_data != quicken_data ||
+          existing_data->optimized_return_void_ != optimized_return_void) {
+        VLOG(compiler) << "Quicken data mismatch, for method "
                        << dex_file.PrettyMethod(method_idx);
-        // Unquicken using the existing quicken data.
-        optimizer::ArtDecompileDEX(dex_file,
-                                   *code_item,
-                                   ArrayRef<const uint8_t>(existing_data->quicken_data_),
-                                   /* decompile_return_instruction*/ false);
-        // Go clear the vmaps for all the methods that were already quickened to avoid writing them
-        // out during oat writing.
-        for (const MethodReference& ref : existing_data->methods_) {
-          CompiledMethod* method = driver_->GetCompiledMethod(ref);
-          DCHECK(method != nullptr);
-          method->ReleaseVMapTable();
-        }
-        // Blacklist the method to never attempt to quicken it in the future.
-        blacklisted_code_items_.insert(code_item);
-        shared_code_item_quicken_info_.erase(existing);
-        return nullptr;
+        // Mark the method as a conflict to never attempt to quicken it in the future.
+        existing_data->conflict_ = true;
       }
       existing_data->methods_.push_back(method_ref);
     } else {
       QuickenState new_state;
       new_state.methods_.push_back(method_ref);
       new_state.quicken_data_ = quicken_data;
+      new_state.optimized_return_void_ = optimized_return_void;
       bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second;
       CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx);
     }
@@ -556,9 +626,65 @@ CompiledMethod* DexToDexCompiler::CompileMethod(
       ArrayRef<const uint8_t>(quicken_data),       // vmap_table
       ArrayRef<const uint8_t>(),                   // cfi data
       ArrayRef<const linker::LinkerPatch>());
+  DCHECK(ret != nullptr);
   return ret;
 }
 
+void DexToDexCompiler::SetDexFiles(const std::vector<const DexFile*>& dex_files) {
+  // Record what code items are already seen to detect when multiple methods have the same code
+  // item.
+  std::unordered_set<const DexFile::CodeItem*> seen_code_items;
+  for (const DexFile* dex_file : dex_files) {
+    for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        continue;
+      }
+      ClassDataItemIterator it(*dex_file, class_data);
+      it.SkipAllFields();
+      for (; it.HasNextMethod(); it.Next()) {
+        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
+        // Detect the shared code items.
+        if (!seen_code_items.insert(code_item).second) {
+          shared_code_items_.insert(code_item);
+        }
+      }
+    }
+  }
+  VLOG(compiler) << "Shared code items " << shared_code_items_.size();
+}
+
+void DexToDexCompiler::UnquickenConflictingMethods() {
+  MutexLock mu(Thread::Current(), lock_);
+  size_t unquicken_count = 0;
+  for (const auto& pair : shared_code_item_quicken_info_) {
+    const DexFile::CodeItem* code_item = pair.first;
+    const QuickenState& state = pair.second;
+    CHECK_GE(state.methods_.size(), 1u);
+    if (state.conflict_) {
+      // Unquicken using the existing quicken data.
+      // TODO: Do we really need to pass a dex file in?
+      optimizer::ArtDecompileDEX(*state.methods_[0].dex_file,
+                                 *code_item,
+                                 ArrayRef<const uint8_t>(state.quicken_data_),
+                                 /* decompile_return_instruction*/ true);
+      ++unquicken_count;
+      // Go clear the vmaps for all the methods that were already quickened to avoid writing them
+      // out during oat writing.
+      for (const MethodReference& ref : state.methods_) {
+        CompiledMethod* method = driver_->RemoveCompiledMethod(ref);
+        if (method != nullptr) {
+          // There is up to one compiled method for each method ref. Releasing it leaves the
+          // deduped data intact, this means its safe to do even when other threads might be
+          // compiling.
+          CompiledMethod::ReleaseSwapAllocatedCompiledMethod(driver_, method);
+        }
+      }
+    }
+  }
+}
+
 }  // namespace optimizer
 
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 2105a9ded4..7df09f140c 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -59,99 +59,35 @@ class DexToDexCompiler {
                                 const CompilationLevel compilation_level) WARN_UNUSED;
 
   void MarkForCompilation(Thread* self,
-                          const MethodReference& method_ref,
-                          const DexFile::CodeItem* code_item);
+                          const MethodReference& method_ref);
 
   void ClearState();
 
+  // Unquicken all methods that have conflicting quicken info. This is not done during the
+  // quickening process to avoid race conditions.
+  void UnquickenConflictingMethods();
+
   CompilerDriver* GetDriver() {
     return driver_;
   }
 
   bool ShouldCompileMethod(const MethodReference& ref);
 
-  size_t NumUniqueCodeItems(Thread* self) const;
+  // Return the number of code items to quicken.
+  size_t NumCodeItemsToQuicken(Thread* self) const;
+
+  void SetDexFiles(const std::vector<const DexFile*>& dex_files);
 
  private:
   // Holds the state for compiling a single method.
-  struct CompilationState {
-    struct QuickenedInfo {
-      QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {}
-
-      uint32_t dex_pc;
-      uint16_t dex_member_index;
-    };
-
-    CompilationState(DexToDexCompiler* compiler,
-                     const DexCompilationUnit& unit,
-                     const CompilationLevel compilation_level,
-                     const std::vector<uint8_t>* quicken_data);
-
-    const std::vector<QuickenedInfo>& GetQuickenedInfo() const {
-      return quickened_info_;
-    }
-
-    // Returns the quickening info, or an empty array if it was not quickened.
-    // If already_quickened is true, then don't change anything but still return what the quicken
-    // data would have been.
-    std::vector<uint8_t> Compile();
-
-    const DexFile& GetDexFile() const;
-
-    // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
-    // a barrier is required.
-    void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
-
-    // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In
-    // this case, returns the second NOP instruction pointer. Otherwise, returns
-    // the given "inst".
-    Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc);
-
-    // Compiles a field access into a quick field access.
-    // The field index is replaced by an offset within an Object where we can read
-    // from / write to this field. Therefore, this does not involve any resolution
-    // at runtime.
-    // Since the field index is encoded with 16 bits, we can replace it only if the
-    // field offset can be encoded with 16 bits too.
-    void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc,
-                                    Instruction::Code new_opcode, bool is_put);
-
-    // Compiles a virtual method invocation into a quick virtual method invocation.
-    // The method index is replaced by the vtable index where the corresponding
-    // executable can be found. Therefore, this does not involve any resolution
-    // at runtime.
-    // Since the method index is encoded with 16 bits, we can replace it only if the
-    // vtable index can be encoded with 16 bits too.
-    void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
-                              Instruction::Code new_opcode, bool is_range);
-
-    // Return the next index.
-    uint16_t NextIndex();
-
-    // Returns the dequickened index if an instruction is quickened, otherwise return index.
-    uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index);
-
-    DexToDexCompiler* const compiler_;
-    CompilerDriver& driver_;
-    const DexCompilationUnit& unit_;
-    const CompilationLevel compilation_level_;
-
-    // Filled by the compiler when quickening, in order to encode that information
-    // in the .oat file. The runtime will use that information to get to the original
-    // opcodes.
-    std::vector<QuickenedInfo> quickened_info_;
-
-    // If the code item was already quickened previously.
-    const bool already_quickened_;
-    const QuickenInfoTable existing_quicken_info_;
-    uint32_t quicken_index_ = 0u;
-
-    DISALLOW_COPY_AND_ASSIGN(CompilationState);
-  };
+  struct CompilationState;
 
+  // Quicken state for a code item, may be referenced by multiple methods.
   struct QuickenState {
     std::vector<MethodReference> methods_;
     std::vector<uint8_t> quicken_data_;
+    bool optimized_return_void_ = false;
+    bool conflict_ = false;
   };
 
   BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_);
@@ -166,15 +102,14 @@ class DexToDexCompiler {
   mutable Mutex lock_;
   // Record what method references are going to get quickened.
   std::unordered_map<const DexFile*, BitVector> should_quicken_;
-  // Record what code items are already seen to detect when multiple methods have the same code
-  // item.
-  std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_);
   // Guarded by lock_ during writing, accessed without a lock during quickening.
   // This is safe because no thread is adding to the shared code items during the quickening phase.
   std::unordered_set<const DexFile::CodeItem*> shared_code_items_;
-  std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_);
+  // Blacklisted code items are unquickened in UnquickenConflictingMethods.
   std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_
       GUARDED_BY(lock_);
+  // Number of added code items.
+  size_t num_code_items_ GUARDED_BY(lock_) = 0u;
 };
 
 std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c617f54f55..fb428b8d9a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -289,7 +289,6 @@ CompilerDriver::CompilerDriver(
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
       max_arena_alloc_(0),
-      compiling_dex_to_dex_(false),
       dex_to_dex_compiler_(this) {
   DCHECK(compiler_options_ != nullptr);
 
@@ -450,99 +449,39 @@ static bool InstructionSetHasGenericJniStub(InstructionSet isa) {
   }
 }
 
-static void CompileMethod(Thread* self,
-                          CompilerDriver* driver,
-                          const DexFile::CodeItem* code_item,
-                          uint32_t access_flags,
-                          InvokeType invoke_type,
-                          uint16_t class_def_idx,
-                          uint32_t method_idx,
-                          Handle<mirror::ClassLoader> class_loader,
-                          const DexFile& dex_file,
-                          optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
-                          bool compilation_enabled,
-                          Handle<mirror::DexCache> dex_cache) {
+template <typename CompileFn>
+static void CompileMethodHarness(
+    Thread* self,
+    CompilerDriver* driver,
+    const DexFile::CodeItem* code_item,
+    uint32_t access_flags,
+    InvokeType invoke_type,
+    uint16_t class_def_idx,
+    uint32_t method_idx,
+    Handle<mirror::ClassLoader> class_loader,
+    const DexFile& dex_file,
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
+    bool compilation_enabled,
+    Handle<mirror::DexCache> dex_cache,
+    CompileFn compile_fn) {
   DCHECK(driver != nullptr);
-  CompiledMethod* compiled_method = nullptr;
+  CompiledMethod* compiled_method;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if (driver->GetCompilingDexToDex()) {
-    optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler();
-    // This is the second pass when we dex-to-dex compile previously marked methods.
-    // TODO: Refactor the compilation to avoid having to distinguish the two passes
-    // here. That should be done on a higher level. http://b/29089975
-    if (compiler->ShouldCompileMethod(method_ref)) {
-      VerificationResults* results = driver->GetVerificationResults();
-      DCHECK(results != nullptr);
-      const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
-      // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
-      // for example, relies on it.
-      compiled_method = compiler->CompileMethod(
-          code_item,
-          access_flags,
-          invoke_type,
-          class_def_idx,
-          method_idx,
-          class_loader,
-          dex_file,
-          (verified_method != nullptr)
-              ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile);
-    }
-  } else if ((access_flags & kAccNative) != 0) {
-    // Are we extracting only and have support for generic JNI down calls?
-    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
-        InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
-      // Leaving this empty will trigger the generic JNI version
-    } else {
-      // Query any JNI optimization annotations such as @FastNative or @CriticalNative.
-      access_flags |= annotations::GetNativeMethodAnnotationAccessFlags(
-          dex_file, dex_file.GetClassDef(class_def_idx), method_idx);
+  compiled_method = compile_fn(self,
+                               driver,
+                               code_item,
+                               access_flags,
+                               invoke_type,
+                               class_def_idx,
+                               method_idx,
+                               class_loader,
+                               dex_file,
+                               dex_to_dex_compilation_level,
+                               compilation_enabled,
+                               dex_cache);
 
-      compiled_method = driver->GetCompiler()->JniCompile(
-          access_flags, method_idx, dex_file, dex_cache);
-      CHECK(compiled_method != nullptr);
-    }
-  } else if ((access_flags & kAccAbstract) != 0) {
-    // Abstract methods don't have code.
-  } else {
-    VerificationResults* results = driver->GetVerificationResults();
-    DCHECK(results != nullptr);
-    const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
-    bool compile = compilation_enabled &&
-        // Basic checks, e.g., not <clinit>.
-        results->IsCandidateForCompilation(method_ref, access_flags) &&
-        // Did not fail to create VerifiedMethod metadcata.
-        verified_method != nullptr &&
-        // Do not have failures that should punt to the interpreter.
-        !verified_method->HasRuntimeThrow() &&
-        (verified_method->GetEncounteredVerificationFailures() &
-            (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
-        // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref) &&
-        driver->ShouldCompileBasedOnProfile(method_ref);
-
-    if (compile) {
-      // NOTE: if compiler declines to compile this method, it will return null.
-      compiled_method = driver->GetCompiler()->Compile(code_item,
-                                                       access_flags,
-                                                       invoke_type,
-                                                       class_def_idx,
-                                                       method_idx,
-                                                       class_loader,
-                                                       dex_file,
-                                                       dex_cache);
-    }
-    if (compiled_method == nullptr &&
-        dex_to_dex_compilation_level !=
-            optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) {
-      DCHECK(!Runtime::Current()->UseJitCompilation());
-      DCHECK(!driver->GetCompilingDexToDex());
-      // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item);
-    }
-  }
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
     if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) {
@@ -573,6 +512,170 @@ static void CompileMethod(Thread* self,
   }
 }
 
+static void CompileMethodDex2Dex(
+    Thread* self,
+    CompilerDriver* driver,
+    const DexFile::CodeItem* code_item,
+    uint32_t access_flags,
+    InvokeType invoke_type,
+    uint16_t class_def_idx,
+    uint32_t method_idx,
+    Handle<mirror::ClassLoader> class_loader,
+    const DexFile& dex_file,
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
+    bool compilation_enabled,
+    Handle<mirror::DexCache> dex_cache) {
+  auto dex_2_dex_fn = [](Thread* self ATTRIBUTE_UNUSED,
+      CompilerDriver* driver,
+      const DexFile::CodeItem* code_item,
+      uint32_t access_flags,
+      InvokeType invoke_type,
+      uint16_t class_def_idx,
+      uint32_t method_idx,
+      Handle<mirror::ClassLoader> class_loader,
+      const DexFile& dex_file,
+      optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
+      bool compilation_enabled ATTRIBUTE_UNUSED,
+      Handle<mirror::DexCache> dex_cache ATTRIBUTE_UNUSED) -> CompiledMethod* {
+    DCHECK(driver != nullptr);
+    MethodReference method_ref(&dex_file, method_idx);
+
+    optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler();
+
+    if (compiler->ShouldCompileMethod(method_ref)) {
+      VerificationResults* results = driver->GetVerificationResults();
+      DCHECK(results != nullptr);
+      const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
+      // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
+      // for example, relies on it.
+      return compiler->CompileMethod(
+          code_item,
+          access_flags,
+          invoke_type,
+          class_def_idx,
+          method_idx,
+          class_loader,
+          dex_file,
+          (verified_method != nullptr)
+          ? dex_to_dex_compilation_level
+              : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile);
+    }
+    return nullptr;
+  };
+  CompileMethodHarness(self,
+                       driver,
+                       code_item,
+                       access_flags,
+                       invoke_type,
+                       class_def_idx,
+                       method_idx,
+                       class_loader,
+                       dex_file,
+                       dex_to_dex_compilation_level,
+                       compilation_enabled,
+                       dex_cache,
+                       dex_2_dex_fn);
+}
+
+static void CompileMethodQuick(
+    Thread* self,
+    CompilerDriver* driver,
+    const DexFile::CodeItem* code_item,
+    uint32_t access_flags,
+    InvokeType invoke_type,
+    uint16_t class_def_idx,
+    uint32_t method_idx,
+    Handle<mirror::ClassLoader> class_loader,
+    const DexFile& dex_file,
+    optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
+    bool compilation_enabled,
+    Handle<mirror::DexCache> dex_cache) {
+  auto quick_fn = [](
+      Thread* self,
+      CompilerDriver* driver,
+      const DexFile::CodeItem* code_item,
+      uint32_t access_flags,
+      InvokeType invoke_type,
+      uint16_t class_def_idx,
+      uint32_t method_idx,
+      Handle<mirror::ClassLoader> class_loader,
+      const DexFile& dex_file,
+      optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level,
+      bool compilation_enabled,
+      Handle<mirror::DexCache> dex_cache) {
+    DCHECK(driver != nullptr);
+    CompiledMethod* compiled_method = nullptr;
+    MethodReference method_ref(&dex_file, method_idx);
+
+    if ((access_flags & kAccNative) != 0) {
+      // Are we extracting only and have support for generic JNI down calls?
+      if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
+          InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
+        // Leaving this empty will trigger the generic JNI version
+      } else {
+        // Query any JNI optimization annotations such as @FastNative or @CriticalNative.
+        access_flags |= annotations::GetNativeMethodAnnotationAccessFlags(
+            dex_file, dex_file.GetClassDef(class_def_idx), method_idx);
+
+        compiled_method = driver->GetCompiler()->JniCompile(
+            access_flags, method_idx, dex_file, dex_cache);
+        CHECK(compiled_method != nullptr);
+      }
+    } else if ((access_flags & kAccAbstract) != 0) {
+      // Abstract methods don't have code.
+    } else {
+      VerificationResults* results = driver->GetVerificationResults();
+      DCHECK(results != nullptr);
+      const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref);
+      bool compile = compilation_enabled &&
+          // Basic checks, e.g., not <clinit>.
+          results->IsCandidateForCompilation(method_ref, access_flags) &&
+          // Did not fail to create VerifiedMethod metadata.
+          verified_method != nullptr &&
+          // Do not have failures that should punt to the interpreter.
+          !verified_method->HasRuntimeThrow() &&
+          (verified_method->GetEncounteredVerificationFailures() &
+              (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
+              // Is eligable for compilation by methods-to-compile filter.
+              driver->IsMethodToCompile(method_ref) &&
+              driver->ShouldCompileBasedOnProfile(method_ref);
+
+      if (compile) {
+        // NOTE: if compiler declines to compile this method, it will return null.
+        compiled_method = driver->GetCompiler()->Compile(code_item,
+                                                         access_flags,
+                                                         invoke_type,
+                                                         class_def_idx,
+                                                         method_idx,
+                                                         class_loader,
+                                                         dex_file,
+                                                         dex_cache);
+      }
+      if (compiled_method == nullptr &&
+          dex_to_dex_compilation_level !=
+              optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) {
+        DCHECK(!Runtime::Current()->UseJitCompilation());
+        // TODO: add a command-line option to disable DEX-to-DEX compilation ?
+        driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref);
+      }
+    }
+    return compiled_method;
+  };
+  CompileMethodHarness(self,
+                       driver,
+                       code_item,
+                       access_flags,
+                       invoke_type,
+                       class_def_idx,
+                       method_idx,
+                       class_loader,
+                       dex_file,
+                       dex_to_dex_compilation_level,
+                       compilation_enabled,
+                       dex_cache,
+                       quick_fn);
+}
+
 void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   jobject jclass_loader;
@@ -614,37 +717,34 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
-  DCHECK(!compiling_dex_to_dex_);
-  CompileMethod(self,
-                this,
-                code_item,
-                access_flags,
-                invoke_type,
-                class_def_idx,
-                method_idx,
-                class_loader,
-                *dex_file,
-                dex_to_dex_compilation_level,
-                true,
-                dex_cache);
-
-  const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self);
+  CompileMethodQuick(self,
+                     this,
+                     code_item,
+                     access_flags,
+                     invoke_type,
+                     class_def_idx,
+                     method_idx,
+                     class_loader,
+                     *dex_file,
+                     dex_to_dex_compilation_level,
+                     true,
+                     dex_cache);
+
+  const size_t num_methods = dex_to_dex_compiler_.NumCodeItemsToQuicken(self);
   if (num_methods != 0) {
     DCHECK_EQ(num_methods, 1u);
-    compiling_dex_to_dex_ = true;
-    CompileMethod(self,
-                  this,
-                  code_item,
-                  access_flags,
-                  invoke_type,
-                  class_def_idx,
-                  method_idx,
-                  class_loader,
-                  *dex_file,
-                  dex_to_dex_compilation_level,
-                  true,
-                  dex_cache);
-    compiling_dex_to_dex_ = false;
+    CompileMethodDex2Dex(self,
+                         this,
+                         code_item,
+                         access_flags,
+                         invoke_type,
+                         class_def_idx,
+                         method_idx,
+                         class_loader,
+                         *dex_file,
+                         dex_to_dex_compilation_level,
+                         true,
+                         dex_cache);
     dex_to_dex_compiler_.ClearState();
   }
 
@@ -1444,13 +1544,19 @@ class ParallelCompilationManager {
 
   void ForAll(size_t begin, size_t end, CompilationVisitor* visitor, size_t work_units)
       REQUIRES(!*Locks::mutator_lock_) {
+    ForAllLambda(begin, end, [visitor](size_t index) { visitor->Visit(index); }, work_units);
+  }
+
+  template <typename Fn>
+  void ForAllLambda(size_t begin, size_t end, Fn fn, size_t work_units)
+      REQUIRES(!*Locks::mutator_lock_) {
     Thread* self = Thread::Current();
     self->AssertNoPendingException();
     CHECK_GT(work_units, 0U);
 
     index_.StoreRelaxed(begin);
     for (size_t i = 0; i < work_units; ++i) {
-      thread_pool_->AddTask(self, new ForAllClosure(this, end, visitor));
+      thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn));
     }
     thread_pool_->StartWorkers(self);
 
@@ -1470,32 +1576,33 @@ class ParallelCompilationManager {
   }
 
  private:
-  class ForAllClosure : public Task {
+  template <typename Fn>
+  class ForAllClosureLambda : public Task {
    public:
-    ForAllClosure(ParallelCompilationManager* manager, size_t end, CompilationVisitor* visitor)
+    ForAllClosureLambda(ParallelCompilationManager* manager, size_t end, Fn fn)
         : manager_(manager),
           end_(end),
-          visitor_(visitor) {}
+          fn_(fn) {}
 
-    virtual void Run(Thread* self) {
+    void Run(Thread* self) OVERRIDE {
       while (true) {
         const size_t index = manager_->NextIndex();
         if (UNLIKELY(index >= end_)) {
           break;
         }
-        visitor_->Visit(index);
+        fn_(index);
         self->AssertNoPendingException();
       }
     }
 
-    virtual void Finalize() {
+    void Finalize() OVERRIDE {
       delete this;
     }
 
    private:
     ParallelCompilationManager* const manager_;
     const size_t end_;
-    CompilationVisitor* const visitor_;
+    Fn fn_;
   };
 
   AtomicInteger index_;
@@ -2574,64 +2681,33 @@ void CompilerDriver::InitializeClasses(jobject class_loader,
   }
 }
 
-void CompilerDriver::Compile(jobject class_loader,
-                             const std::vector<const DexFile*>& dex_files,
-                             TimingLogger* timings) {
-  if (kDebugProfileGuidedCompilation) {
-    LOG(INFO) << "[ProfileGuidedCompilation] " <<
-        ((profile_compilation_info_ == nullptr)
-            ? "null"
-            : profile_compilation_info_->DumpInfo(&dex_files));
-  }
-
-  dex_to_dex_compiler_.ClearState();
-  compiling_dex_to_dex_ = false;
-  for (const DexFile* dex_file : dex_files) {
-    CHECK(dex_file != nullptr);
-    CompileDexFile(class_loader,
-                   *dex_file,
-                   dex_files,
-                   parallel_thread_pool_.get(),
-                   parallel_thread_count_,
-                   timings);
-    const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool();
-    const size_t arena_alloc = arena_pool->GetBytesAllocated();
-    max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_);
-    Runtime::Current()->ReclaimArenaPoolMemory();
-  }
-
-  if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) {
-    compiling_dex_to_dex_ = true;
-    // TODO: Not visit all of the dex files, its probably rare that only one would have quickened
-    // methods though.
-    for (const DexFile* dex_file : dex_files) {
-      CompileDexFile(class_loader,
-                     *dex_file,
-                     dex_files,
-                     parallel_thread_pool_.get(),
-                     parallel_thread_count_,
-                     timings);
-    }
-    dex_to_dex_compiler_.ClearState();
-    compiling_dex_to_dex_ = false;
-  }
-
-  VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
-}
-
-class CompileClassVisitor : public CompilationVisitor {
- public:
-  explicit CompileClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
+template <typename CompileFn>
+static void CompileDexFile(CompilerDriver* driver,
+                           jobject class_loader,
+                           const DexFile& dex_file,
+                           const std::vector<const DexFile*>& dex_files,
+                           ThreadPool* thread_pool,
+                           size_t thread_count,
+                           TimingLogger* timings,
+                           const char* timing_name,
+                           CompileFn compile_fn) {
+  TimingLogger::ScopedTiming t(timing_name, timings);
+  ParallelCompilationManager context(Runtime::Current()->GetClassLinker(),
+                                     class_loader,
+                                     driver,
+                                     &dex_file,
+                                     dex_files,
+                                     thread_pool);
 
-  virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+  auto compile = [&context, &compile_fn](size_t class_def_index) {
     ScopedTrace trace(__FUNCTION__);
-    const DexFile& dex_file = *manager_->GetDexFile();
+    const DexFile& dex_file = *context.GetDexFile();
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
-    ClassLinker* class_linker = manager_->GetClassLinker();
-    jobject jclass_loader = manager_->GetClassLoader();
+    ClassLinker* class_linker = context.GetClassLinker();
+    jobject jclass_loader = context.GetClassLoader();
     ClassReference ref(&dex_file, class_def_index);
     // Skip compiling classes with generic verifier failures since they will still fail at runtime
-    if (manager_->GetCompiler()->verification_results_->IsClassRejected(ref)) {
+    if (context.GetCompiler()->GetVerificationResults()->IsClassRejected(ref)) {
       return;
     }
     // Use a scoped object access to perform to the quick SkipClass check.
@@ -2662,7 +2738,7 @@ class CompileClassVisitor : public CompilationVisitor {
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(soa.Self(), kNative);
 
-    CompilerDriver* const driver = manager_->GetCompiler();
+    CompilerDriver* const driver = context.GetCompiler();
 
     // Can we run DEX-to-DEX compiler on this class ?
     optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level =
@@ -2685,38 +2761,71 @@ class CompileClassVisitor : public CompilationVisitor {
         continue;
       }
       previous_method_idx = method_idx;
-      CompileMethod(soa.Self(),
-                    driver,
-                    it.GetMethodCodeItem(),
-                    it.GetMethodAccessFlags(),
-                    it.GetMethodInvokeType(class_def),
-                    class_def_index,
-                    method_idx,
-                    class_loader,
-                    dex_file,
-                    dex_to_dex_compilation_level,
-                    compilation_enabled,
-                    dex_cache);
+      compile_fn(soa.Self(),
+                 driver,
+                 it.GetMethodCodeItem(),
+                 it.GetMethodAccessFlags(),
+                 it.GetMethodInvokeType(class_def),
+                 class_def_index,
+                 method_idx,
+                 class_loader,
+                 dex_file,
+                 dex_to_dex_compilation_level,
+                 compilation_enabled,
+                 dex_cache);
       it.Next();
     }
     DCHECK(!it.HasNext());
+  };
+  context.ForAllLambda(0, dex_file.NumClassDefs(), compile, thread_count);
+}
+
+void CompilerDriver::Compile(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] " <<
+        ((profile_compilation_info_ == nullptr)
+            ? "null"
+            : profile_compilation_info_->DumpInfo(&dex_files));
   }
 
- private:
-  const ParallelCompilationManager* const manager_;
-};
+  dex_to_dex_compiler_.ClearState();
+  for (const DexFile* dex_file : dex_files) {
+    CHECK(dex_file != nullptr);
+    CompileDexFile(this,
+                   class_loader,
+                   *dex_file,
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings,
+                   "Compile Dex File Quick",
+                   CompileMethodQuick);
+    const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool();
+    const size_t arena_alloc = arena_pool->GetBytesAllocated();
+    max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_);
+    Runtime::Current()->ReclaimArenaPoolMemory();
+  }
 
-void CompilerDriver::CompileDexFile(jobject class_loader,
-                                    const DexFile& dex_file,
-                                    const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool,
-                                    size_t thread_count,
-                                    TimingLogger* timings) {
-  TimingLogger::ScopedTiming t("Compile Dex File", timings);
-  ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
-                                     &dex_file, dex_files, thread_pool);
-  CompileClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
+  if (dex_to_dex_compiler_.NumCodeItemsToQuicken(Thread::Current()) > 0u) {
+    // TODO: Not visit all of the dex files, its probably rare that only one would have quickened
+    // methods though.
+    for (const DexFile* dex_file : dex_files) {
+      CompileDexFile(this,
+                     class_loader,
+                     *dex_file,
+                     dex_files,
+                     parallel_thread_pool_.get(),
+                     parallel_thread_count_,
+                     timings,
+                     "Compile Dex File Dex2Dex",
+                     CompileMethodDex2Dex);
+    }
+    dex_to_dex_compiler_.ClearState();
+  }
+
+  VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
 
 void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
@@ -2731,6 +2840,12 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
   DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod();
 }
 
+CompiledMethod* CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) {
+  CompiledMethod* ret = nullptr;
+  CHECK(compiled_methods_.Remove(method_ref, &ret));
+  return ret;
+}
+
 bool CompilerDriver::GetCompiledClass(const ClassReference& ref, ClassStatus* status) const {
   DCHECK(status != nullptr);
   // The table doesn't know if something wasn't inserted. For this case it will return
@@ -2904,6 +3019,7 @@ void CompilerDriver::FreeThreadPools() {
 void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
   dex_files_for_oat_file_ = dex_files;
   compiled_classes_.AddDexFiles(dex_files);
+  dex_to_dex_compiler_.SetDexFiles(dex_files);
 }
 
 void CompilerDriver::SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index b51e0debbb..2b524a347d 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -165,6 +165,7 @@ class CompilerDriver {
   void AddCompiledMethod(const MethodReference& method_ref,
                          CompiledMethod* const compiled_method,
                          size_t non_relative_linker_patch_count);
+  CompiledMethod* RemoveCompiledMethod(const MethodReference& method_ref);
 
   void SetRequiresConstructorBarrier(Thread* self,
                                      const DexFile* dex_file,
@@ -374,10 +375,6 @@ class CompilerDriver {
         || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
   }
 
-  bool GetCompilingDexToDex() const {
-    return compiling_dex_to_dex_;
-  }
-
   optimizer::DexToDexCompiler& GetDexToDexCompiler() {
     return dex_to_dex_compiler_;
   }
@@ -449,13 +446,6 @@ class CompilerDriver {
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
                TimingLogger* timings);
-  void CompileDexFile(jobject class_loader,
-                      const DexFile& dex_file,
-                      const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool,
-                      size_t thread_count,
-                      TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_);
 
   bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
 
@@ -541,7 +531,6 @@ class CompilerDriver {
   size_t max_arena_alloc_;
 
   // Compiler for dex to dex (quickening).
-  bool compiling_dex_to_dex_;
   optimizer::DexToDexCompiler dex_to_dex_compiler_;
 
   friend class CompileClassVisitor;
diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h
index 1c875189c5..3145497091 100644
--- a/compiler/linker/elf_builder.h
+++ b/compiler/linker/elf_builder.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_LINKER_ELF_BUILDER_H_
 
 #include <vector>
-#include <unordered_map>
 
 #include "arch/instruction_set.h"
 #include "arch/mips/instruction_set_features_mips.h"
@@ -310,24 +309,27 @@ class ElfBuilder FINAL {
                   /* info */ 0,
                   align,
                   /* entsize */ 0),
-          current_offset_(0) {
+          current_offset_(0),
+          last_offset_(0) {
     }
 
     Elf_Word Write(const std::string& name) {
       if (current_offset_ == 0) {
         DCHECK(name.empty());
+      } else if (name == last_name_) {
+        return last_offset_;  // Very simple string de-duplication.
       }
-      auto res = written_names_.emplace(name, current_offset_);
-      if (res.second) {  // Inserted.
-        this->WriteFully(name.c_str(), name.length() + 1);
-        current_offset_ += name.length() + 1;
-      }
-      return res.first->second;  // Offset.
+      last_name_ = name;
+      last_offset_ = current_offset_;
+      this->WriteFully(name.c_str(), name.length() + 1);
+      current_offset_ += name.length() + 1;
+      return last_offset_;
     }
 
    private:
     Elf_Word current_offset_;
-    std::unordered_map<std::string, Elf_Word> written_names_;  // Dedup strings.
+    std::string last_name_;
+    Elf_Word last_offset_;
   };
 
   // Writer of .dynsym and .symtab sections.
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cca1055ac8..9fa0f72e80 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -146,6 +146,65 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi
   }
 }
 
+static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) {
+  // Test for an if as last statement.
+  if (!block->EndsWithIf()) {
+    return false;
+  }
+  HIf* ifs = block->GetLastInstruction()->AsIf();
+  // Find either:
+  //   if obj == null
+  //     throws
+  //   else
+  //     not_throws
+  // or:
+  //   if obj != null
+  //     not_throws
+  //   else
+  //     throws
+  HInstruction* cond = ifs->InputAt(0);
+  HBasicBlock* not_throws = nullptr;
+  if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) {
+    not_throws = ifs->IfFalseSuccessor();
+  } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) {
+    not_throws = ifs->IfTrueSuccessor();
+  } else {
+    return false;
+  }
+  DCHECK(cond->IsEqual() || cond->IsNotEqual());
+  HInstruction* obj = cond->InputAt(1);
+  if (obj->IsNullConstant()) {
+    obj = cond->InputAt(0);
+  } else if (!cond->InputAt(0)->IsNullConstant()) {
+    return false;
+  }
+  // Scan all uses of obj and find null check under control dependence.
+  HBoundType* bound = nullptr;
+  const HUseList<HInstruction*>& uses = obj->GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end;) {
+    HInstruction* user = it->GetUser();
+    ++it;  // increment before possibly replacing
+    if (user->IsNullCheck()) {
+      HBasicBlock* user_block = user->GetBlock();
+      if (user_block != block &&
+          user_block != throws &&
+          block->Dominates(user_block)) {
+        if (bound == nullptr) {
+          ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
+          bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj);
+          bound->SetUpperBound(ti, /*can_be_null*/ false);
+          bound->SetReferenceTypeInfo(ti);
+          bound->SetCanBeNull(false);
+          not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction());
+        }
+        user->ReplaceWith(bound);
+        user_block->RemoveInstruction(user);
+      }
+    }
+  }
+  return bound != nullptr;
+}
+
 // Simplify the pattern:
 //
 //           B1
@@ -203,6 +262,11 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
         block->ReplaceSuccessor(succ, exit);
         rerun_dominance_and_loop_analysis = true;
         MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
+        // Perform a quick follow up optimization on object != null control dependences
+        // that is much cheaper to perform now than in a later phase.
+        if (RemoveNonNullControlDependences(pred, block)) {
+          MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 88326d321b..8b4eae1780 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -25,6 +25,51 @@
 
 #include <iostream>
 
+/**
+ * The general algorithm of load-store elimination (LSE).
+ * Load-store analysis in the previous pass collects a list of heap locations
+ * and does alias analysis of those heap locations.
+ * LSE keeps track of a list of heap values corresponding to the heap
+ * locations. It visits basic blocks in reverse post order and for
+ * each basic block, visits instructions sequentially, and processes
+ * instructions as follows:
+ * - If the instruction is a load, and the heap location for that load has a
+ *   valid heap value, the load can be eliminated. In order to maintain the
+ *   validity of all heap locations during the optimization phase, the real
+ *   elimination is delayed till the end of LSE.
+ * - If the instruction is a store, it updates the heap value for the heap
+ *   location of the store with the store instruction. The real heap value
+ *   can be fetched from the store instruction. Heap values are invalidated
+ *   for heap locations that may alias with the store instruction's heap
+ *   location. The store instruction can be eliminated unless the value stored
+ *   is later needed e.g. by a load from the same/aliased heap location or
+ *   the heap location persists at method return/deoptimization.
+ *   The store instruction is also needed if it's not used to track the heap
+ *   value anymore, e.g. when it fails to merge with the heap values from other
+ *   predecessors.
+ * - A store that stores the same value as the heap value is eliminated.
+ * - The list of heap values are merged at basic block entry from the basic
+ *   block's predecessors. The algorithm is single-pass, so loop side-effects is
+ *   used as best effort to decide if a heap location is stored inside the loop.
+ * - A special type of objects called singletons are instantiated in the method
+ *   and have a single name, i.e. no aliases. Singletons have exclusive heap
+ *   locations since they have no aliases. Singletons are helpful in narrowing
+ *   down the life span of a heap location such that they do not always
+ *   need to participate in merging heap values. Allocation of a singleton
+ *   can be eliminated if that singleton is not used and does not persist
+ *   at method return/deoptimization.
+ * - For newly instantiated instances, their heap values are initialized to
+ *   language defined default values.
+ * - Some instructions such as invokes are treated as loading and invalidating
+ *   all the heap values, depending on the instruction's side effects.
+ * - Finalizable objects are considered as persisting at method
+ *   return/deoptimization.
+ * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad
+ *   and VecStore instructions.
+ * - Currently this LSE algorithm doesn't handle graph with try-catch, due to
+ *   the special block merging structure.
+ */
+
 namespace art {
 
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
@@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
         removed_loads_(allocator_.Adapter(kArenaAllocLSE)),
         substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)),
         possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)),
-        singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
-        singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) {
+        singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor {
     return type_conversion;
   }
 
-  // Find an instruction's substitute if it should be removed.
+  // Find an instruction's substitute if it's a removed load.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
+    if (!IsLoad(instruction)) {
+      return instruction;
+    }
     size_t size = removed_loads_.size();
     for (size_t i = 0; i < size; i++) {
       if (removed_loads_[i] == instruction) {
-        return substitute_instructions_for_loads_[i];
+        HInstruction* substitute = substitute_instructions_for_loads_[i];
+        // The substitute list is a flat hierarchy.
+        DCHECK_EQ(FindSubstitute(substitute), substitute);
+        return substitute;
       }
     }
     return instruction;
   }
 
   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
+    DCHECK(IsLoad(load));
     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
     removed_loads_.push_back(load);
@@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor {
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
-    for (HInstruction* new_array : singleton_new_arrays_) {
-      size_t removed = HConstructorFence::RemoveConstructorFences(new_array);
-      MaybeRecordStat(stats_,
-                      MethodCompilationStat::kConstructorFenceRemovedLSE,
-                      removed);
+  }
 
-      if (!new_array->HasNonEnvironmentUses()) {
-        new_array->RemoveEnvironmentUsers();
-        new_array->GetBlock()->RemoveInstruction(new_array);
-      }
+ private:
+  static bool IsLoad(HInstruction* instruction) {
+    if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
+      return false;
     }
+    // Unresolved load is not treated as a load.
+    return instruction->IsInstanceFieldGet() ||
+        instruction->IsStaticFieldGet() ||
+        instruction->IsArrayGet();
   }
 
- private:
-  // If heap_values[index] is an instance field store, need to keep the store.
-  // This is necessary if a heap value is killed due to merging, or loop side
-  // effects (which is essentially merging also), since a load later from the
-  // location won't be eliminated.
+  static bool IsStore(HInstruction* instruction) {
+    if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
+      return false;
+    }
+    // Unresolved store is not treated as a store.
+    return instruction->IsInstanceFieldSet() ||
+        instruction->IsArraySet() ||
+        instruction->IsStaticFieldSet();
+  }
+
+  // Returns the real heap value by finding its substitute or by "peeling"
+  // a store instruction.
+  HInstruction* GetRealHeapValue(HInstruction* heap_value) {
+    if (IsLoad(heap_value)) {
+      return FindSubstitute(heap_value);
+    }
+    if (!IsStore(heap_value)) {
+      return heap_value;
+    }
+
+    // We keep track of store instructions as the heap values which might be
+    // eliminated if the stores are later found not necessary. The real stored
+    // value needs to be fetched from the store instruction.
+    if (heap_value->IsInstanceFieldSet()) {
+      heap_value = heap_value->AsInstanceFieldSet()->GetValue();
+    } else if (heap_value->IsStaticFieldSet()) {
+      heap_value = heap_value->AsStaticFieldSet()->GetValue();
+    } else {
+      DCHECK(heap_value->IsArraySet());
+      heap_value = heap_value->AsArraySet()->GetValue();
+    }
+    // heap_value may already be a removed load.
+    return FindSubstitute(heap_value);
+  }
+
+  // If heap_value is a store, need to keep the store.
+  // This is necessary if a heap value is killed or replaced by another value,
+  // so that the store is no longer used to track heap value.
   void KeepIfIsStore(HInstruction* heap_value) {
-    if (heap_value == kDefaultHeapValue ||
-        heap_value == kUnknownHeapValue ||
-        !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
+    if (!IsStore(heap_value)) {
       return;
     }
     auto idx = std::find(possibly_removed_stores_.begin(),
@@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor {
     }
   }
 
+  // If a heap location X may alias with heap location at `loc_index`
+  // and heap_values of that heap location X holds a store, keep that store.
+  // It's needed for a dependent load that's not eliminated since any store
+  // that may put value into the load's heap location needs to be kept.
+  void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values,
+                                     size_t loc_index) {
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) {
+        KeepIfIsStore(heap_values[i]);
+      }
+    }
+  }
+
   void HandleLoopSideEffects(HBasicBlock* block) {
     DCHECK(block->IsLoopHeader());
     int block_id = block->GetBlockId();
     ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    ScopedArenaVector<HInstruction*>& pre_header_heap_values =
+        heap_values_for_[pre_header->GetBlockId()];
 
-    // Don't eliminate loads in irreducible loops. This is safe for singletons, because
-    // they are always used by the non-eliminated loop-phi.
+    // Don't eliminate loads in irreducible loops.
+    // Also keep the stores before the loop.
     if (block->GetLoopInformation()->IsIrreducible()) {
       if (kIsDebugBuild) {
         for (size_t i = 0; i < heap_values.size(); i++) {
           DCHECK_EQ(heap_values[i], kUnknownHeapValue);
         }
       }
+      for (size_t i = 0; i < heap_values.size(); i++) {
+        KeepIfIsStore(pre_header_heap_values[i]);
+      }
       return;
     }
 
-    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
-    ScopedArenaVector<HInstruction*>& pre_header_heap_values =
-        heap_values_for_[pre_header->GetBlockId()];
-
     // Inherit the values from pre-header.
     for (size_t i = 0; i < heap_values.size(); i++) {
       heap_values[i] = pre_header_heap_values[i];
@@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor {
       for (size_t i = 0; i < heap_values.size(); i++) {
         HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
         ReferenceInfo* ref_info = location->GetReferenceInfo();
-        if (ref_info->IsSingletonAndRemovable() &&
-            !location->IsValueKilledByLoopSideEffects()) {
-          // A removable singleton's field that's not stored into inside a loop is
+        if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) {
+          // A singleton's field that's not stored into inside a loop is
           // invariant throughout the loop. Nothing to do.
         } else {
-          // heap value is killed by loop side effects (stored into directly, or
-          // due to aliasing). Or the heap value may be needed after method return
-          // or deoptimization.
+          // heap value is killed by loop side effects.
           KeepIfIsStore(pre_header_heap_values[i]);
           heap_values[i] = kUnknownHeapValue;
         }
       }
+    } else {
+      // The loop doesn't kill any value.
     }
   }
 
@@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor {
     ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
       HInstruction* merged_value = nullptr;
+      // If we can merge the store itself from the predecessors, we keep
+      // the store as the heap value as long as possible. In case we cannot
+      // merge the store, we try to merge the values of the stores.
+      HInstruction* merged_store_value = nullptr;
       // Whether merged_value is a result that's merged from all predecessors.
       bool from_all_predecessors = true;
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+      HInstruction* ref = ref_info->GetReference();
       HInstruction* singleton_ref = nullptr;
       if (ref_info->IsSingleton()) {
-        // We do more analysis of liveness when merging heap values for such
-        // cases since stores into such references may potentially be eliminated.
-        singleton_ref = ref_info->GetReference();
+        // We do more analysis based on singleton's liveness when merging
+        // heap values for such cases.
+        singleton_ref = ref;
       }
 
       for (HBasicBlock* predecessor : predecessors) {
         HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i];
+        if (!IsStore(pred_value)) {
+          pred_value = FindSubstitute(pred_value);
+        }
+        DCHECK(pred_value != nullptr);
+        HInstruction* pred_store_value = GetRealHeapValue(pred_value);
         if ((singleton_ref != nullptr) &&
             !singleton_ref->GetBlock()->Dominates(predecessor)) {
-          // singleton_ref is not live in this predecessor. Skip this predecessor since
-          // it does not really have the location.
+          // singleton_ref is not live in this predecessor. No need to merge
+          // since singleton_ref is not live at the beginning of this block.
           DCHECK_EQ(pred_value, kUnknownHeapValue);
           from_all_predecessors = false;
-          continue;
+          break;
         }
         if (merged_value == nullptr) {
           // First seen heap value.
+          DCHECK(pred_value != nullptr);
           merged_value = pred_value;
         } else if (pred_value != merged_value) {
           // There are conflicting values.
           merged_value = kUnknownHeapValue;
+          // We may still be able to merge store values.
+        }
+
+        // Conflicting stores may be storing the same value. We do another merge
+        // of real stored values.
+        if (merged_store_value == nullptr) {
+          // First seen store value.
+          DCHECK(pred_store_value != nullptr);
+          merged_store_value = pred_store_value;
+        } else if (pred_store_value != merged_store_value) {
+          // There are conflicting store values.
+          merged_store_value = kUnknownHeapValue;
+          // There must be conflicting stores also.
+          DCHECK_EQ(merged_value, kUnknownHeapValue);
+          // No need to merge anymore.
           break;
         }
       }
 
-      if (ref_info->IsSingleton()) {
-        if (ref_info->IsSingletonAndNonRemovable() ||
-            (merged_value == kUnknownHeapValue &&
-             !block->IsSingleReturnOrReturnVoidAllowingPhis())) {
-          // The heap value may be needed after method return or deoptimization,
-          // or there are conflicting heap values from different predecessors and
-          // this block is not a single return,
-          // keep the last store in each predecessor since future loads may not
-          // be eliminated.
+      if (merged_value == nullptr) {
+        DCHECK(!from_all_predecessors);
+        DCHECK(singleton_ref != nullptr);
+      }
+      if (from_all_predecessors) {
+        if (ref_info->IsSingletonAndRemovable() &&
+            block->IsSingleReturnOrReturnVoidAllowingPhis()) {
+          // Values in the singleton are not needed anymore.
+        } else if (!IsStore(merged_value)) {
+          // We don't track merged value as a store anymore. We have to
+          // hold the stores in predecessors live here.
           for (HBasicBlock* predecessor : predecessors) {
             ScopedArenaVector<HInstruction*>& pred_values =
                 heap_values_for_[predecessor->GetBlockId()];
@@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor {
           }
         }
       } else {
-        // Currenctly we don't eliminate stores to non-singletons.
+        DCHECK(singleton_ref != nullptr);
+        // singleton_ref is non-existing at the beginning of the block. There is
+        // no need to keep the stores.
       }
 
-      if ((merged_value == nullptr) || !from_all_predecessors) {
+      if (!from_all_predecessors) {
         DCHECK(singleton_ref != nullptr);
         DCHECK((singleton_ref->GetBlock() == block) ||
-               !singleton_ref->GetBlock()->Dominates(block));
+               !singleton_ref->GetBlock()->Dominates(block))
+            << "method: " << GetGraph()->GetMethodName();
         // singleton_ref is not defined before block or defined only in some of its
         // predecessors, so block doesn't really have the location at its entry.
         heap_values[i] = kUnknownHeapValue;
-      } else {
+      } else if (predecessors.size() == 1) {
+        // Inherit heap value from the single predecessor.
+        DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value);
         heap_values[i] = merged_value;
+      } else {
+        DCHECK(merged_value == kUnknownHeapValue ||
+               merged_value == kDefaultHeapValue ||
+               merged_value->GetBlock()->Dominates(block));
+        if (merged_value != kUnknownHeapValue) {
+          heap_values[i] = merged_value;
+        } else {
+          // Stores in different predecessors may be storing the same value.
+          heap_values[i] = merged_store_value;
+        }
       }
     }
   }
@@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor {
       heap_values[idx] = constant;
       return;
     }
-    if (heap_value != kUnknownHeapValue) {
-      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
-        HInstruction* store = heap_value;
-        // This load must be from a singleton since it's from the same
-        // field/element that a "removed" store puts the value. That store
-        // must be to a singleton's field/element.
-        DCHECK(ref_info->IsSingleton());
-        // Get the real heap value of the store.
-        heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
-        // heap_value may already have a substitute.
-        heap_value = FindSubstitute(heap_value);
-      }
-    }
+    heap_value = GetRealHeapValue(heap_value);
     if (heap_value == kUnknownHeapValue) {
       // Load isn't eliminated. Put the load as the value into the HeapLocation.
       // This acts like GVN but with better aliasing analysis.
       heap_values[idx] = instruction;
+      KeepStoresIfAliasedToLocation(heap_values, idx);
     } else {
       if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) {
         // The only situation where the same heap location has different type is when
@@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor {
           DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName();
           DCHECK(instruction->IsArrayGet()) << instruction->DebugName();
         }
+        // Load isn't eliminated. Put the load as the value into the HeapLocation.
+        // This acts like GVN but with better aliasing analysis.
+        heap_values[idx] = instruction;
+        KeepStoresIfAliasedToLocation(heap_values, idx);
         return;
       }
       AddRemovedLoad(instruction, heap_value);
@@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor {
   }
 
   bool Equal(HInstruction* heap_value, HInstruction* value) {
+    DCHECK(!IsStore(value)) << value->DebugName();
+    if (heap_value == kUnknownHeapValue) {
+      // Don't compare kUnknownHeapValue with other values.
+      return false;
+    }
     if (heap_value == value) {
       return true;
     }
     if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) {
       return true;
     }
+    HInstruction* real_heap_value = GetRealHeapValue(heap_value);
+    if (real_heap_value != heap_value) {
+      return Equal(real_heap_value, value);
+    }
     return false;
   }
 
@@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
                         size_t vector_length,
                         int16_t declaring_class_def_index,
                         HInstruction* value) {
+    DCHECK(!IsStore(value)) << value->DebugName();
     // value may already have a substitute.
     value = FindSubstitute(value);
     HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
@@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor {
     ScopedArenaVector<HInstruction*>& heap_values =
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     HInstruction* heap_value = heap_values[idx];
-    bool same_value = false;
     bool possibly_redundant = false;
+
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
-      same_value = true;
-    } else if (index != nullptr &&
-               heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) {
-      // For array element, don't eliminate stores if the location can be aliased
-      // (due to either ref or index aliasing).
-    } else if (ref_info->IsSingleton()) {
-      // Store into a field/element of a singleton. The value cannot be killed due to
-      // aliasing/invocation. It can be redundant since future loads can
-      // directly get the value set by this instruction. The value can still be killed due to
-      // merging or loop side effects. Stores whose values are killed due to merging/loop side
-      // effects later will be removed from possibly_removed_stores_ when that is detected.
-      // Stores whose values may be needed after method return or deoptimization
-      // are also removed from possibly_removed_stores_ when that is detected.
-      possibly_redundant = true;
+      // This store can be eliminated right away.
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else {
       HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
-      if (loop_info != nullptr) {
-        // instruction is a store in the loop so the loop must does write.
+      if (loop_info == nullptr) {
+        // Store is not in a loop. We try to precisely track the heap value by
+        // the store.
+        possibly_redundant = true;
+      } else if (!loop_info->IsIrreducible()) {
+        // instruction is a store in the loop so the loop must do write.
         DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
-
-        if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
-          DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
-          // Keep the store since its value may be needed at the loop header.
-          possibly_redundant = false;
-        } else {
-          // The singleton is created inside the loop. Value stored to it isn't needed at
+        if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) {
+          // original_ref is created inside the loop. Value stored to it isn't needed at
           // the loop header. This is true for outer loops also.
+          possibly_redundant = true;
+        } else {
+          // Keep the store since its value may be needed at the loop header.
         }
+      } else {
+        // Keep the store inside irreducible loops.
       }
     }
-    if (same_value || possibly_redundant) {
+    if (possibly_redundant) {
       possibly_removed_stores_.push_back(instruction);
     }
 
-    if (!same_value) {
-      if (possibly_redundant) {
-        DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
-        // Put the store as the heap value. If the value is loaded from heap
-        // by a load later, this store isn't really redundant.
-        heap_values[idx] = instruction;
-      } else {
-        heap_values[idx] = value;
-      }
-    }
+    // Put the store as the heap value. If the value is loaded or needed after
+    // return/deoptimization later, this store isn't really redundant.
+    heap_values[idx] = instruction;
+
     // This store may kill values in other heap locations due to aliasing.
     for (size_t i = 0; i < heap_values.size(); i++) {
       if (i == idx) {
         continue;
       }
-      if (heap_values[i] == value) {
+      if (Equal(heap_values[i], value)) {
         // Same value should be kept even if aliasing happens.
         continue;
       }
@@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor {
         continue;
       }
       if (heap_location_collector_.MayAlias(i, idx)) {
-        // Kill heap locations that may alias.
+        // Kill heap locations that may alias and as a result if the heap value
+        // is a store, the store needs to be kept.
+        KeepIfIsStore(heap_values[i]);
         heap_values[i] = kUnknownHeapValue;
       }
     }
@@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor {
     const ScopedArenaVector<HInstruction*>& heap_values =
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     for (HInstruction* heap_value : heap_values) {
-      // Filter out fake instructions before checking instruction kind below.
-      if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) {
-        continue;
-      }
       // A store is kept as the heap value for possibly removed stores.
-      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
-        // Check whether the reference for a store is used by an environment local of
-        // HDeoptimize.
+      // That value stored is generally observeable after deoptimization, except
+      // for singletons that don't escape after deoptimization.
+      if (IsStore(heap_value)) {
+        if (heap_value->IsStaticFieldSet()) {
+          KeepIfIsStore(heap_value);
+          continue;
+        }
         HInstruction* reference = heap_value->InputAt(0);
-        DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton());
-        for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
-          HEnvironment* user = use.GetUser();
-          if (user->GetHolder() == instruction) {
-            // The singleton for the store is visible at this deoptimization
-            // point. Need to keep the store so that the heap value is
-            // seen by the interpreter.
+        if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) {
+          if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) {
+            // Finalizable objects alway escape.
             KeepIfIsStore(heap_value);
+            continue;
+          }
+          // Check whether the reference for a store is used by an environment local of
+          // HDeoptimize. If not, the singleton is not observed after
+          // deoptimizion.
+          for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+            HEnvironment* user = use.GetUser();
+            if (user->GetHolder() == instruction) {
+              // The singleton for the store is visible at this deoptimization
+              // point. Need to keep the store so that the heap value is
+              // seen by the interpreter.
+              KeepIfIsStore(heap_value);
+            }
           }
+        } else {
+          KeepIfIsStore(heap_value);
         }
       }
     }
@@ -691,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor {
         // Singleton references cannot be seen by the callee.
       } else {
         if (side_effects.DoesAnyRead()) {
+          // Invocation may read the heap value.
           KeepIfIsStore(heap_values[i]);
         }
         if (side_effects.DoesAnyWrite()) {
+          // Keep the store since it's not used to track the heap value anymore.
+          KeepIfIsStore(heap_values[i]);
           heap_values[i] = kUnknownHeapValue;
         }
       }
@@ -758,7 +904,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
       return;
     }
     if (ref_info->IsSingletonAndRemovable()) {
-      singleton_new_arrays_.push_back(new_array);
+      singleton_new_instances_.push_back(new_array);
     }
     ScopedArenaVector<HInstruction*>& heap_values =
         heap_values_for_[new_array->GetBlock()->GetBlockId()];
@@ -791,7 +937,6 @@ class LSEVisitor : public HGraphDelegateVisitor {
   ScopedArenaVector<HInstruction*> possibly_removed_stores_;
 
   ScopedArenaVector<HInstruction*> singleton_new_instances_;
-  ScopedArenaVector<HInstruction*> singleton_new_arrays_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
 };
diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h
index 203e484fb7..7023b9a0e8 100644
--- a/compiler/utils/atomic_dex_ref_map-inl.h
+++ b/compiler/utils/atomic_dex_ref_map-inl.h
@@ -75,6 +75,18 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Get(const DexFileRefer
 }
 
 template <typename DexFileReferenceType, typename Value>
+inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Remove(const DexFileReferenceType& ref,
+                                                                 Value* out) {
+  ElementArray* const array = GetArray(ref.dex_file);
+  if (array == nullptr) {
+    return false;
+  }
+  *out = (*array)[ref.index].LoadRelaxed();
+  (*array)[ref.index].StoreSequentiallyConsistent(nullptr);
+  return true;
+}
+
+template <typename DexFileReferenceType, typename Value>
 inline void AtomicDexRefMap<DexFileReferenceType, Value>::AddDexFile(const DexFile* dex_file) {
   arrays_.Put(dex_file, std::move(ElementArray(NumberOfDexIndices(dex_file))));
 }
diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h
index 9ff506d6a4..3474e16b8d 100644
--- a/compiler/utils/atomic_dex_ref_map.h
+++ b/compiler/utils/atomic_dex_ref_map.h
@@ -45,6 +45,9 @@ class AtomicDexRefMap {
   // Retreive an item, returns false if the dex file is not added.
   bool Get(const DexFileReferenceType& ref, Value* out) const;
 
+  // Remove an item and return the existing value. Returns false if the dex file is not added.
+  bool Remove(const DexFileReferenceType& ref, Value* out);
+
   // Dex files must be added before method references belonging to them can be used as keys. Not
   // thread safe.
   void AddDexFile(const DexFile* dex_file);
diff --git a/dexlayout/compact_dex_writer.cc b/dexlayout/compact_dex_writer.cc
index 08438c4f4a..ca13f7588b 100644
--- a/dexlayout/compact_dex_writer.cc
+++ b/dexlayout/compact_dex_writer.cc
@@ -162,11 +162,18 @@ void CompactDexWriter::WriteCodeItem(Stream* stream,
 
   static constexpr size_t kPayloadInstructionRequiredAlignment = 4;
   const uint32_t current_code_item_start = stream->Tell() + preheader_bytes;
-  if (!IsAlignedParam(current_code_item_start, kPayloadInstructionRequiredAlignment)) {
+  if (!IsAlignedParam(current_code_item_start, kPayloadInstructionRequiredAlignment) ||
+      kIsDebugBuild) {
     // If the preheader is going to make the code unaligned, consider adding 2 bytes of padding
     // before if required.
-    for (const DexInstructionPcPair& instruction : code_item->Instructions()) {
-      const Instruction::Code opcode = instruction->Opcode();
+    IterationRange<DexInstructionIterator> instructions = code_item->Instructions();
+    SafeDexInstructionIterator it(instructions.begin(), instructions.end());
+    for (; !it.IsErrorState() && it < instructions.end(); ++it) {
+      // In case the instruction goes past the end of the code item, make sure to not process it.
+      if (std::next(it).IsErrorState()) {
+        break;
+      }
+      const Instruction::Code opcode = it->Opcode();
       // Payload instructions possibly require special alignment for their data.
       if (opcode == Instruction::FILL_ARRAY_DATA ||
           opcode == Instruction::PACKED_SWITCH ||
@@ -460,6 +467,11 @@ void CompactDexWriter::Write(DexContainer* output)  {
     // Rewrite the header with the calculated checksum.
     WriteHeader(main_stream);
   }
+
+  // Clear the dedupe to prevent interdex code item deduping. This does not currently work well with
+  // dex2oat's class unloading. The issue is that verification encounters quickened opcodes after
+  // the first dex gets unloaded.
+  code_item_dedupe_->Clear();
 }
 
 std::unique_ptr<DexContainer> CompactDexWriter::CreateDexContainer() const {
diff --git a/dexlayout/compact_dex_writer.h b/dexlayout/compact_dex_writer.h
index 24d0fbf61d..ea9f7d13db 100644
--- a/dexlayout/compact_dex_writer.h
+++ b/dexlayout/compact_dex_writer.h
@@ -44,6 +44,11 @@ class CompactDexWriter : public DexWriter {
     // Returns the offset of the deduplicated data or kDidNotDedupe did deduplication did not occur.
     uint32_t Dedupe(uint32_t data_start, uint32_t data_end, uint32_t item_offset);
 
+    // Clear dedupe state to prevent deduplication against existing items in the future.
+    void Clear() {
+      dedupe_map_.clear();
+    }
+
    private:
     class HashedMemoryRange {
      public:
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index d2f9cb9ce5..5635271dc1 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -66,8 +66,7 @@ class Options {
   bool visualize_pattern_ = false;
   bool update_checksum_ = false;
   CompactDexLevel compact_dex_level_ = CompactDexLevel::kCompactDexLevelNone;
-  // Disabled until dex2oat properly handles quickening of deduped code items.
-  bool dedupe_code_items_ = false;
+  bool dedupe_code_items_ = true;
   OutputFormat output_format_ = kOutputPlain;
   const char* output_dex_directory_ = nullptr;
   const char* output_file_name_ = nullptr;
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index be272fcf2c..bebdc202e7 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -221,6 +221,12 @@ static const char kDuplicateCodeItemInputDex[] =
     "AHAAAAACAAAAAwAAAIwAAAADAAAAAQAAAJgAAAAFAAAABAAAAKQAAAAGAAAAAQAAAMQAAAABIAAA"
     "AwAAAOQAAAACIAAABwAAACQBAAADIAAAAwAAAFYBAAAAIAAAAQAAAGUBAAAAEAAAAQAAAHgBAAA=";
 
+// Returns the default compact dex option for dexlayout based on kDefaultCompactDexLevel.
+static std::vector<std::string> DefaultCompactDexOption() {
+  return (kDefaultCompactDexLevel == CompactDexLevel::kCompactDexLevelFast) ?
+      std::vector<std::string>{"-x", "fast"} : std::vector<std::string>{"-x", "none"};
+}
+
 static void WriteBase64ToFile(const char* base64, File* file) {
   // Decode base64.
   CHECK(base64 != nullptr);
@@ -289,7 +295,7 @@ class DexLayoutTest : public CommonRuntimeTest {
     for (const std::string &dex_file : GetLibCoreDexFileNames()) {
       std::vector<std::string> dexlayout_args =
           { "-w", tmp_dir, "-o", tmp_name, dex_file };
-      if (!DexLayoutExec(dexlayout_args, error_msg)) {
+      if (!DexLayoutExec(dexlayout_args, error_msg, /*pass_default_cdex_option*/ false)) {
         return false;
       }
       size_t dex_file_last_slash = dex_file.rfind('/');
@@ -304,12 +310,10 @@ class DexLayoutTest : public CommonRuntimeTest {
       if (!::art::Exec(diff_exec_argv, error_msg)) {
         return false;
       }
-      std::vector<std::string> rm_zip_exec_argv = { "/bin/rm", tmp_dir + "classes.dex" };
-      if (!::art::Exec(rm_zip_exec_argv, error_msg)) {
+      if (!UnlinkFile(tmp_dir + "classes.dex")) {
         return false;
       }
-      std::vector<std::string> rm_out_exec_argv = { "/bin/rm", tmp_dir + dex_file_name };
-      if (!::art::Exec(rm_out_exec_argv, error_msg)) {
+      if (!UnlinkFile(tmp_dir + dex_file_name)) {
         return false;
       }
     }
@@ -426,10 +430,7 @@ class DexLayoutTest : public CommonRuntimeTest {
     }
 
     // -v makes sure that the layout did not corrupt the dex file.
-
-    std::vector<std::string> rm_exec_argv =
-        { "/bin/rm", dex_file, profile_file, output_dex };
-    if (!::art::Exec(rm_exec_argv, error_msg)) {
+    if (!UnlinkFile(dex_file) || !UnlinkFile(profile_file) || !UnlinkFile(output_dex)) {
       return false;
     }
     return true;
@@ -467,7 +468,7 @@ class DexLayoutTest : public CommonRuntimeTest {
     // -v makes sure that the layout did not corrupt the dex file.
     std::vector<std::string> dexlayout_args =
         { "-i", "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
-    if (!DexLayoutExec(dexlayout_args, error_msg)) {
+    if (!DexLayoutExec(dexlayout_args, error_msg, /*pass_default_cdex_option*/ false)) {
       return false;
     }
 
@@ -479,7 +480,7 @@ class DexLayoutTest : public CommonRuntimeTest {
     // -i since the checksum won't match from the first layout.
     std::vector<std::string> second_dexlayout_args =
         { "-i", "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, output_dex };
-    if (!DexLayoutExec(second_dexlayout_args, error_msg)) {
+    if (!DexLayoutExec(second_dexlayout_args, error_msg, /*pass_default_cdex_option*/ false)) {
       return false;
     }
 
@@ -490,10 +491,11 @@ class DexLayoutTest : public CommonRuntimeTest {
       diff_result = false;
     }
 
-    std::vector<std::string> rm_exec_argv =
-        { "/bin/rm", dex_file, profile_file, output_dex, second_output_dex };
-    if (!::art::Exec(rm_exec_argv, error_msg)) {
-      return false;
+    std::vector<std::string> test_files = { dex_file, profile_file, output_dex, second_output_dex };
+    for (auto test_file : test_files) {
+      if (!UnlinkFile(test_file)) {
+        return false;
+      }
     }
 
     return diff_result;
@@ -512,7 +514,7 @@ class DexLayoutTest : public CommonRuntimeTest {
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::vector<std::string> dexlayout_args = { "-w", tmp_dir, "-o", "/dev/null", input_dex };
-    if (!DexLayoutExec(dexlayout_args, error_msg)) {
+    if (!DexLayoutExec(dexlayout_args, error_msg, /*pass_default_cdex_option*/ false)) {
       return false;
     }
 
@@ -522,9 +524,11 @@ class DexLayoutTest : public CommonRuntimeTest {
       return false;
     }
 
-    std::vector<std::string> rm_exec_argv = { "/bin/rm", input_dex, output_dex };
-    if (!::art::Exec(rm_exec_argv, error_msg)) {
-      return false;
+    std::vector<std::string> dex_files = { input_dex, output_dex };
+    for (auto dex_file : dex_files) {
+      if (!UnlinkFile(dex_file)) {
+        return false;
+      }
     }
     return true;
   }
@@ -550,17 +554,27 @@ class DexLayoutTest : public CommonRuntimeTest {
     return true;
   }
 
-  bool DexLayoutExec(const std::vector<std::string>& dexlayout_args, std::string* error_msg) {
+  bool DexLayoutExec(const std::vector<std::string>& dexlayout_args,
+                     std::string* error_msg,
+                     bool pass_default_cdex_option = true) {
     std::vector<std::string> argv;
 
     std::string dexlayout = GetDexLayoutPath();
     CHECK(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
     argv.push_back(dexlayout);
+    if (pass_default_cdex_option) {
+      std::vector<std::string> cdex_level = DefaultCompactDexOption();
+      argv.insert(argv.end(), cdex_level.begin(), cdex_level.end());
+    }
 
     argv.insert(argv.end(), dexlayout_args.begin(), dexlayout_args.end());
 
     return ::art::Exec(argv, error_msg);
   }
+
+  bool UnlinkFile(const std::string& file_path) {
+    return unix_file::FdFile(file_path, 0, false).Unlink();
+  }
 };
 
 
@@ -730,11 +744,29 @@ TEST_F(DexLayoutTest, CodeItemOverrun) {
     CHECK(mutated_successfully)
         << "Failed to find candidate code item with only one code unit in last instruction.";
   });
-  std::vector<std::string> dexlayout_args = { "-i", "-o", "/dev/null", temp_dex.GetFilename() };
+
+  std::string error_msg;
+
+  ScratchFile tmp_file;
+  const std::string& tmp_name = tmp_file.GetFilename();
+  size_t tmp_last_slash = tmp_name.rfind('/');
+  std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
+  ScratchFile profile_file;
+
+  std::vector<std::string> dexlayout_args =
+      { "-i",
+        "-v",
+        "-w", tmp_dir,
+        "-o", tmp_name,
+        "-p", profile_file.GetFilename(),
+        temp_dex.GetFilename()
+      };
+  // -v makes sure that the layout did not corrupt the dex file.
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             /*dex_filename*/ nullptr,
-                            nullptr /* profile_file */,
+                            &profile_file,
                             dexlayout_args));
+  ASSERT_TRUE(UnlinkFile(temp_dex.GetFilename() + ".new"));
 }
 
 // Test that link data is written out (or at least the header is updated).
@@ -772,11 +804,7 @@ TEST_F(DexLayoutTest, LinkData) {
                             /*dex_filename*/ nullptr,
                             &profile_file,
                             dexlayout_args));
-
-  std::string output_dex = temp_dex.GetFilename() + ".new";
-  std::vector<std::string> rm_exec_argv =
-      { "/bin/rm", output_dex };
-  ASSERT_TRUE(::art::Exec(rm_exec_argv, &error_msg));
+  ASSERT_TRUE(UnlinkFile(temp_dex.GetFilename() + ".new"));
 }
 
 TEST_F(DexLayoutTest, ClassFilter) {
diff --git a/openjdkjvmti/ti_class_definition.h b/openjdkjvmti/ti_class_definition.h
index e0b9b31622..31c3611e72 100644
--- a/openjdkjvmti/ti_class_definition.h
+++ b/openjdkjvmti/ti_class_definition.h
@@ -49,8 +49,7 @@ namespace openjdkjvmti {
 class ArtClassDefinition {
  public:
   // If we support doing a on-demand dex-dequickening using signal handlers.
-  // TODO Make this true. We currently have some ASAN issues with this.
-  static constexpr bool kEnableOnDemandDexDequicken = false;
+  static constexpr bool kEnableOnDemandDexDequicken = true;
 
   ArtClassDefinition()
       : klass_(nullptr),
diff --git a/openjdkjvmti/transform.cc b/openjdkjvmti/transform.cc
index 7b19a24ba4..dc9f69a96a 100644
--- a/openjdkjvmti/transform.cc
+++ b/openjdkjvmti/transform.cc
@@ -128,6 +128,11 @@ class TransformationFaultHandler FINAL : public art::FaultHandler {
       }
     }
 
+    if (LIKELY(self != nullptr)) {
+      CHECK_EQ(self->GetState(), art::ThreadState::kNative)
+          << "Transformation fault handler occurred outside of native mode";
+    }
+
     VLOG(signals) << "Lazy initialization of dex file for transformation of " << res->GetName()
                   << " during SEGV";
     res->InitializeMemory();
@@ -250,6 +255,10 @@ void Transformer::TransformSingleClassDirect(EventHandler* event_handler,
   static_assert(kEvent == ArtJvmtiEvent::kClassFileLoadHookNonRetransformable ||
                 kEvent == ArtJvmtiEvent::kClassFileLoadHookRetransformable,
                 "bad event type");
+  // We don't want to do transitions between calling the event and setting the new data so change to
+  // native state early. This also avoids any problems that the FaultHandler might have in
+  // determining if an access to the dex_data is from generated code or not.
+  art::ScopedThreadStateChange stsc(self, art::ThreadState::kNative);
   ScopedDefinitionHandler handler(def);
   jint new_len = -1;
   unsigned char* new_data = nullptr;
diff --git a/runtime/dex/dex_file_layout.h b/runtime/dex/dex_file_layout.h
index a7b9051f24..793e3b5de7 100644
--- a/runtime/dex/dex_file_layout.h
+++ b/runtime/dex/dex_file_layout.h
@@ -83,7 +83,7 @@ class DexLayoutSection {
     }
 
     void CombineSection(uint32_t start_offset, uint32_t end_offset) {
-      DCHECK_LT(start_offset, end_offset);
+      DCHECK_LE(start_offset, end_offset);
       if (start_offset_ == end_offset_) {
         start_offset_ = start_offset;
         end_offset_ = end_offset;
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 172472a957..3adbf18a7a 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -166,7 +166,6 @@ TEST_F(MemMapTest, Start) {
 // We need mremap to be able to test ReplaceMapping at all
 #if HAVE_MREMAP_SYSCALL
 TEST_F(MemMapTest, ReplaceMapping_SameSize) {
-  TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND();
   std::string error_msg;
   std::unique_ptr<MemMap> dest(MemMap::MapAnonymous("MapAnonymousEmpty-atomic-replace-dest",
                                                     nullptr,
@@ -204,7 +203,6 @@ TEST_F(MemMapTest, ReplaceMapping_SameSize) {
 }
 
 TEST_F(MemMapTest, ReplaceMapping_MakeLarger) {
-  TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND();
   std::string error_msg;
   std::unique_ptr<MemMap> dest(MemMap::MapAnonymous("MapAnonymousEmpty-atomic-replace-dest",
                                                     nullptr,
@@ -253,7 +251,6 @@ TEST_F(MemMapTest, ReplaceMapping_MakeLarger) {
 }
 
 TEST_F(MemMapTest, ReplaceMapping_MakeSmaller) {
-  TEST_DISABLED_FOR_MEMORY_TOOL_VALGRIND();
   std::string error_msg;
   std::unique_ptr<MemMap> dest(MemMap::MapAnonymous("MapAnonymousEmpty-atomic-replace-dest",
                                                     nullptr,
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index bdfb44a87e..6cb5aec191 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -83,7 +83,7 @@ static bool CheckStack(Backtrace* bt, const std::vector<std::string>& seq) {
   printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
-      printf("  %s\n", it->func_name.c_str());
+      printf("  %s\n", Backtrace::FormatFrameData(&*it).c_str());
     }
   }
 
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index f6332b5503..98838c5089 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -398,7 +398,6 @@ public class Main {
   /// CHECK-START: int Main.test15() load_store_elimination (after)
   /// CHECK: <<Const2:i\d+>> IntConstant 2
   /// CHECK: StaticFieldSet
-  /// CHECK: StaticFieldSet
   /// CHECK-NOT: StaticFieldGet
   /// CHECK: Return [<<Const2>>]
 
@@ -773,6 +772,127 @@ public class Main {
     return obj;
   }
 
+  /// CHECK-START: void Main.testStoreStore2(TestClass2) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: void Main.testStoreStore2(TestClass2) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+
+  private static void testStoreStore2(TestClass2 obj) {
+    obj.i = 41;
+    obj.j = 42;
+    obj.i = 43;
+    obj.j = 44;
+  }
+
+  /// CHECK-START: void Main.testStoreStore3(TestClass2, boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: void Main.testStoreStore3(TestClass2, boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+
+  private static void testStoreStore3(TestClass2 obj, boolean flag) {
+    obj.i = 41;
+    obj.j = 42;    // redundant since it's overwritten in both branches below.
+    if (flag) {
+      obj.j = 43;
+    } else {
+      obj.j = 44;
+    }
+  }
+
+  /// CHECK-START: void Main.testStoreStore4() load_store_elimination (before)
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldSet
+
+  /// CHECK-START: void Main.testStoreStore4() load_store_elimination (after)
+  /// CHECK: StaticFieldSet
+  /// CHECK-NOT: StaticFieldSet
+
+  private static void testStoreStore4() {
+    TestClass.si = 61;
+    TestClass.si = 62;
+  }
+
+  /// CHECK-START: int Main.testStoreStore5(TestClass2, TestClass2) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.testStoreStore5(TestClass2, TestClass2) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  private static int testStoreStore5(TestClass2 obj1, TestClass2 obj2) {
+    obj1.i = 71;      // This store is needed since obj2.i may load from it.
+    int i = obj2.i;
+    obj1.i = 72;
+    return i;
+  }
+
+  /// CHECK-START: int Main.testStoreStore6(TestClass2, TestClass2) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.testStoreStore6(TestClass2, TestClass2) load_store_elimination (after)
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  private static int testStoreStore6(TestClass2 obj1, TestClass2 obj2) {
+    obj1.i = 81;      // This store is not needed since obj2.j cannot load from it.
+    int j = obj2.j;
+    obj1.i = 82;
+    return j;
+  }
+
+  /// CHECK-START: int Main.testNoSideEffects(int[]) load_store_elimination (before)
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testNoSideEffects(int[]) load_store_elimination (after)
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK-NOT: ArraySet
+  /// CHECK-NOT: ArrayGet
+
+  private static int testNoSideEffects(int[] array) {
+    array[0] = 101;
+    array[1] = 102;
+    int bitCount = Integer.bitCount(0x3456);
+    array[1] = 103;
+    return array[0] + bitCount;
+  }
+
+  /// CHECK-START: void Main.testThrow(TestClass2, java.lang.Exception) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Throw
+
+  /// CHECK-START: void Main.testThrow(TestClass2, java.lang.Exception) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Throw
+
+  // Make sure throw keeps the store.
+  private static void testThrow(TestClass2 obj, Exception e) throws Exception {
+    obj.i = 55;
+    throw e;
+  }
+
   /// CHECK-START: int Main.testStoreStoreWithDeoptimize(int[]) load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -814,23 +934,6 @@ public class Main {
     return arr[0] + arr[1] + arr[2] + arr[3];
   }
 
-  /// CHECK-START: int Main.testNoSideEffects(int[]) load_store_elimination (before)
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
-  /// CHECK: ArrayGet
-
-  /// CHECK-START: int Main.testNoSideEffects(int[]) load_store_elimination (after)
-  /// CHECK: ArraySet
-  /// CHECK: ArraySet
-  /// CHECK-NOT: ArrayGet
-
-  private static int testNoSideEffects(int[] array) {
-    array[0] = 101;
-    int bitCount = Integer.bitCount(0x3456);
-    array[1] = array[0] + 1;
-    return array[0] + bitCount;
-  }
-
   /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
   /// CHECK: NewInstance
 
@@ -1105,16 +1208,46 @@ public class Main {
 
     assertIntEquals(testStoreStore().i, 41);
     assertIntEquals(testStoreStore().j, 43);
-    assertIntEquals(testStoreStoreWithDeoptimize(new int[4]), 4);
 
     assertIntEquals(testExitMerge(true), 2);
     assertIntEquals(testExitMerge2(true), 2);
     assertIntEquals(testExitMerge2(false), 2);
 
-    int ret = testNoSideEffects(iarray);
+    TestClass2 testclass2 = new TestClass2();
+    testStoreStore2(testclass2);
+    assertIntEquals(testclass2.i, 43);
+    assertIntEquals(testclass2.j, 44);
+
+    testStoreStore3(testclass2, true);
+    assertIntEquals(testclass2.i, 41);
+    assertIntEquals(testclass2.j, 43);
+    testStoreStore3(testclass2, false);
+    assertIntEquals(testclass2.i, 41);
+    assertIntEquals(testclass2.j, 44);
+
+    testStoreStore4();
+    assertIntEquals(TestClass.si, 62);
+
+    int ret = testStoreStore5(testclass2, testclass2);
+    assertIntEquals(testclass2.i, 72);
+    assertIntEquals(ret, 71);
+
+    testclass2.j = 88;
+    ret = testStoreStore6(testclass2, testclass2);
+    assertIntEquals(testclass2.i, 82);
+    assertIntEquals(ret, 88);
+
+    ret = testNoSideEffects(iarray);
     assertIntEquals(iarray[0], 101);
-    assertIntEquals(iarray[1], 102);
+    assertIntEquals(iarray[1], 103);
     assertIntEquals(ret, 108);
+
+    try {
+      testThrow(testclass2, new Exception());
+    } catch (Exception e) {}
+    assertIntEquals(testclass2.i, 55);
+
+    assertIntEquals(testStoreStoreWithDeoptimize(new int[4]), 4);
   }
 
   static boolean sFlag;
diff --git a/test/530-regression-lse/expected.txt b/test/530-regression-lse/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/530-regression-lse/expected.txt
diff --git a/test/530-regression-lse/info.txt b/test/530-regression-lse/info.txt
new file mode 100644
index 0000000000..688d0c8d1b
--- /dev/null
+++ b/test/530-regression-lse/info.txt
@@ -0,0 +1,2 @@
+Regression test (b/72440777) for load store elimination across invocation
+that has only write side effects.
diff --git a/test/530-regression-lse/src/Main.java b/test/530-regression-lse/src/Main.java
new file mode 100644
index 0000000000..7aec21c86a
--- /dev/null
+++ b/test/530-regression-lse/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+
+public class Main {
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  private static void testRelativePositions(ByteBuffer b) throws Exception {
+    // This goes into Memory.pokeByte(), which is an intrinsic that has
+    // kWriteSideEffects. Stores before this call need to be kept.
+    b.put((byte) 0);
+    assertEquals(1, b.position());
+  }
+
+  private static ByteBuffer allocateMapped(int size) throws Exception {
+    File f = File.createTempFile("mapped", "tmp");
+    f.deleteOnExit();
+    RandomAccessFile raf = new RandomAccessFile(f, "rw");
+    raf.setLength(size);
+    FileChannel ch = raf.getChannel();
+    MappedByteBuffer result = ch.map(FileChannel.MapMode.READ_WRITE, 0, size);
+    ch.close();
+    return result;
+  }
+
+  public static void testRelativePositionsMapped() throws Exception {
+    testRelativePositions(allocateMapped(10));
+  }
+
+  public static void main(String[] args) throws Exception {
+    testRelativePositionsMapped();
+  }
+}
diff --git a/test/608-checker-unresolved-lse/src/Main.java b/test/608-checker-unresolved-lse/src/Main.java
index c6f8854b49..a39dd51bdf 100644
--- a/test/608-checker-unresolved-lse/src/Main.java
+++ b/test/608-checker-unresolved-lse/src/Main.java
@@ -88,7 +88,6 @@ public class Main extends MissingSuperClass {
 
   /// CHECK-START: void Main.staticFieldTest() load_store_elimination (after)
   /// CHECK:        StaticFieldSet
-  /// CHECK:        StaticFieldSet
   /// CHECK:        UnresolvedStaticFieldGet
   public static void staticFieldTest() {
     // Ensure Foo is initialized.
diff --git a/test/639-checker-code-sinking/expected.txt b/test/639-checker-code-sinking/expected.txt
index 52e756c231..5d4833aca8 100644
--- a/test/639-checker-code-sinking/expected.txt
+++ b/test/639-checker-code-sinking/expected.txt
@@ -1,3 +1,3 @@
 0
 class java.lang.Object
-43
+42
diff --git a/test/639-checker-code-sinking/src/Main.java b/test/639-checker-code-sinking/src/Main.java
index 7496925adc..a1c30f7b4e 100644
--- a/test/639-checker-code-sinking/src/Main.java
+++ b/test/639-checker-code-sinking/src/Main.java
@@ -337,7 +337,7 @@ public class Main {
   public static void testStoreStore(boolean doThrow) {
     Main m = new Main();
     m.intField = 42;
-    m.intField = 43;
+    m.intField2 = 43;
     if (doThrow) {
       throw new Error(m.$opt$noinline$toString());
     }
@@ -349,6 +349,7 @@ public class Main {
 
   volatile int volatileField;
   int intField;
+  int intField2;
   Object objectField;
   static boolean doThrow;
   static boolean doLoop;
diff --git a/test/672-checker-throw-method/src/Main.java b/test/672-checker-throw-method/src/Main.java
index ceb5eb784c..a507133b91 100644
--- a/test/672-checker-throw-method/src/Main.java
+++ b/test/672-checker-throw-method/src/Main.java
@@ -37,6 +37,12 @@ public class Main {
       doThrow(par);
   }
 
+  static private void checkNotNullSplitAlt(Object obj, String par) {
+    if (obj != null)
+      return;
+    doThrow(par);
+  }
+
   //
   // Various ways of enforcing non-null parameter.
   // In all cases, par should be subject to code sinking.
@@ -174,6 +180,60 @@ public class Main {
   }
 
   //
+  // Various ways of exploiting non-null parameter.
+  // In all cases, implicit null checks are redundant.
+  //
+
+  /// CHECK-START: int Main.deleteNullCheck(int[]) dead_code_elimination$after_inlining (before)
+  /// CHECK:   <<Par:l\d+>>   ParameterValue
+  /// CHECK:   <<Zero:i\d+>>  IntConstant 0
+  /// CHECK:   <<Null:l\d+>>  NullCheck [<<Par>>]
+  /// CHECK:   <<Len:i\d+>>   ArrayLength [<<Null>>]
+  /// CHECK:   <<Check:i\d+>> BoundsCheck [<<Zero>>,<<Len>>]
+  /// CHECK:   <<Get:i\d+>>   ArrayGet [<<Null>>,<<Check>>]
+  /// CHECK:                  Return [<<Get>>]
+  //
+  /// CHECK-START: int Main.deleteNullCheck(int[]) dead_code_elimination$after_inlining (after)
+  /// CHECK:   <<Par:l\d+>>   ParameterValue
+  /// CHECK:   <<Zero:i\d+>>  IntConstant 0
+  /// CHECK:   <<BT:l\d+>>    BoundType [<<Par>>]
+  /// CHECK:   <<Len:i\d+>>   ArrayLength [<<BT>>]
+  /// CHECK:   <<Check:i\d+>> BoundsCheck [<<Zero>>,<<Len>>]
+  /// CHECK:   <<Get:i\d+>>   ArrayGet [<<BT>>,<<Check>>]
+  /// CHECK:                  Return [<<Get>>]
+  //
+  /// CHECK-START: int Main.deleteNullCheck(int[]) dead_code_elimination$after_inlining (after)
+  /// CHECK-NOT:              NullCheck
+  static public int deleteNullCheck(int[] a) {
+    checkNotNullSplit(a, "a");
+    return a[0];
+  }
+
+  /// CHECK-START: int Main.deleteNullCheckAlt(int[]) dead_code_elimination$after_inlining (before)
+  /// CHECK:     NullCheck
+  //
+  /// CHECK-START: int Main.deleteNullCheckAlt(int[]) dead_code_elimination$after_inlining (after)
+  /// CHECK-NOT: NullCheck
+  static public int deleteNullCheckAlt(int[] a) {
+    checkNotNullSplitAlt(a, "a");
+    return a[0];
+  }
+
+  /// CHECK-START: int Main.deleteNullChecks3(int[], int[], int[]) dead_code_elimination$after_inlining (before)
+  /// CHECK:     NullCheck
+  /// CHECK:     NullCheck
+  /// CHECK:     NullCheck
+  //
+  /// CHECK-START: int Main.deleteNullChecks3(int[], int[], int[]) dead_code_elimination$after_inlining (after)
+  /// CHECK-NOT: NullCheck
+  static public int deleteNullChecks3(int[] a, int[] b, int[] c) {
+    checkNotNullSplit(a, "a");
+    checkNotNullSplit(b, "b");
+    checkNotNullSplit(c, "c");
+    return a[0] + b[0] + c[0];
+  }
+
+  //
   // Test driver.
   //
 
@@ -233,6 +293,18 @@ public class Main {
       expectEquals(5, a[i]);
     }
 
+    int[] x = { 11 } ;
+    expectEquals(11, deleteNullCheck(x));
+    int[] y = { 55 } ;
+    int[] z = { 22 } ;
+    expectEquals(88, deleteNullChecks3(x, y, z));
+
+    try {
+      deleteNullCheck(null);
+      System.out.println("should not reach this!");
+    } catch (Error e) {
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/ManyMethods/ManyMethods.java b/test/ManyMethods/ManyMethods.java
index b3a57f6b3b..98b9faffcd 100644
--- a/test/ManyMethods/ManyMethods.java
+++ b/test/ManyMethods/ManyMethods.java
@@ -26,6 +26,8 @@ class ManyMethods {
     public static String msg7 = "Hello World7";
     public static String msg8 = "Hello World8";
     public static String msg9 = "Hello World9";
+    public static String msg10 = "Hello World10";
+    public static String msg11 = "Hello World11";
   }
 
   static class Printer {
@@ -57,35 +59,35 @@ class ManyMethods {
   }
 
   public static void Print4() {
-    Printer.Print(Strings.msg2);
+    Printer.Print(Strings.msg4);
   }
 
   public static void Print5() {
-    Printer.Print(Strings.msg3);
+    Printer.Print(Strings.msg5);
   }
 
   public static void Print6() {
-    Printer2.Print(Strings.msg4);
+    Printer2.Print(Strings.msg6);
   }
 
   public static void Print7() {
-    Printer.Print(Strings.msg5);
+    Printer.Print(Strings.msg7);
   }
 
   public static void Print8() {
-    Printer.Print(Strings.msg6);
+    Printer.Print(Strings.msg8);
   }
 
   public static void Print9() {
-    Printer2.Print(Strings.msg7);
+    Printer2.Print(Strings.msg9);
   }
 
   public static void Print10() {
-    Printer2.Print(Strings.msg8);
+    Printer2.Print(Strings.msg10);
   }
 
   public static void Print11() {
-    Printer.Print(Strings.msg9);
+    Printer.Print(Strings.msg11);
   }
 
   public static void main(String args[]) {
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 4b99003130..2b28409a1f 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -384,6 +384,12 @@
         "variant": "jvmti-stress & jit | redefine-stress & jit"
     },
     {
+        "test_patterns": ["674-hiddenapi"],
+        "description": ["hiddenapi test is failing with redefine stress cdex"],
+        "bug": "http://b/72610009",
+        "variant": "redefine-stress & cdex-fast"
+    },
+    {
         "test_patterns": ["616-cha"],
         "description": ["The test assumes a boot image exists."],
         "bug": "http://b/34193647",
@@ -425,6 +431,7 @@
     },
     {
         "tests": [
+            "714-invoke-custom-lambda-metafactory",
             "950-redefine-intrinsic",
             "951-threaded-obsolete",
             "952-invoke-custom",
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 8a1e06cabf..3d173f5571 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -357,11 +357,12 @@ def run_tests(tests):
         while threading.active_count() > 2:
           time.sleep(0.1)
           return
+      # NB The order of components here should match the order of
+      # components in the regex parser in parse_test_name.
       test_name = 'test-art-'
       test_name += target + '-run-test-'
       test_name += run + '-'
       test_name += prebuild + '-'
-      test_name += cdex_level + '-'
       test_name += compiler + '-'
       test_name += relocate + '-'
       test_name += trace + '-'
@@ -371,6 +372,7 @@ def run_tests(tests):
       test_name += pictest + '-'
       test_name += debuggable + '-'
       test_name += jvmti + '-'
+      test_name += cdex_level + '-'
       test_name += test
       test_name += address_size
 
diff --git a/test/valgrind-suppressions.txt b/test/valgrind-suppressions.txt
index 086a856f51..a97d03c2d4 100644
--- a/test/valgrind-suppressions.txt
+++ b/test/valgrind-suppressions.txt
@@ -75,3 +75,13 @@
    process_vm_readv(lvec[...])
    fun:process_vm_readv
 }
+
+# Suppressions for IsAddressMapped check in MemMapTest
+{
+   MemMapTest_IsAddressMapped
+   Memcheck:Param
+   msync(start)
+   ...
+   fun:_ZN3art10MemMapTest15IsAddressMappedEPv
+   ...
+}
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 8956e98ed0..a20175531d 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -74,14 +74,7 @@ if [[ $mode == "host" ]]; then
   make_command+=" dx-tests"
   mode_suffix="-host"
 elif [[ $mode == "target" ]]; then
-  # Create dummy hidden API lists which are normally generated by the framework
-  # but which we do not have in the buildbot manifest. These are empty because
-  # we do not want to enforce these rules in the buildbots anyway.
-  hiddenapi_out_dir=${out_dir}/target/common/obj/PACKAGING
-  make_command="mkdir -p ${hiddenapi_out_dir} && "
-  make_command+="touch ${hiddenapi_out_dir}/hiddenapi-{blacklist,dark-greylist,light-greylist}.txt && "
-
-  make_command+="make $j_arg $extra_args $showcommands build-art-target-tests $common_targets"
+  make_command="make $j_arg $extra_args $showcommands build-art-target-tests $common_targets"
   make_command+=" libjavacrypto-target libnetd_client-target linker toybox toolbox sh"
   make_command+=" ${out_dir}/host/linux-x86/bin/adb libstdc++ "
   make_command+=" ${out_dir}/target/product/${TARGET_PRODUCT}/system/etc/public.libraries.txt"