65 files changed, 2586 insertions, 1033 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f0bf4997c6..458973684e 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -108,7 +108,8 @@ LIBART_COMPILER_SRC_FILES := \
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
 	image_writer.cc \
-	oat_writer.cc
+	oat_writer.cc \
+	profile_assistant.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 278c49017e..b5fd1e074f 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -208,8 +208,8 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe
                                             false,
                                             timer_.get(),
                                             -1,
-                                            /* profile_file */ "",
-                                            /* dex_to_oat_map */ nullptr));
+                                            /* dex_to_oat_map */ nullptr,
+                                            /* profile_compilation_info */ nullptr));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index bcf20c7efa..12568a4ad4 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -92,7 +92,7 @@ class QuickCFITest : public CFITest {
                           false,
                           0,
                           -1,
-                          "",
+                          nullptr,
                           nullptr);
     ClassLinker* linker = nullptr;
     CompilationUnit cu(&pool, isa, &driver, linker);
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 3260a7a050..ebc9a2c9ea 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -520,7 +520,12 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
 // In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
 // which will force scanning for any unsupported opcodes.
 static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
-  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods))) {
+    // Always need to scan opcodes if we have default methods since invoke-super for interface
+    // methods is never going to be supported in the quick compiler.
+    return false;
+  } else if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
     // All opcodes are supported no matter what. Usually not the case
     // since experimental opcodes are not implemented in the quick compiler.
     return true;
@@ -538,8 +543,28 @@ static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
   }
 }
 
+bool QuickCompiler::CanCompileInstruction(const MIR* mir,
+                                          const DexFile& dex_file) const {
+  switch (mir->dalvikInsn.opcode) {
+    // Quick compiler won't support new instruction semantics to invoke-super into an interface
+    // method
+    case Instruction::INVOKE_SUPER:  // Fall-through
+    case Instruction::INVOKE_SUPER_RANGE: {
+      DCHECK(mir->dalvikInsn.IsInvoke());
+      uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+      const DexFile::MethodId& method_id = dex_file.GetMethodId(invoke_method_idx);
+      const DexFile::ClassDef* class_def = dex_file.FindClassDef(method_id.class_idx_);
+      // False if we are an interface i.e. !(java_access_flags & kAccInterface)
+      return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0);
+    }
+    default:
+      return true;
+  }
+}
+
 // Skip the method that we do not support currently.
-bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+bool QuickCompiler::CanCompileMethod(uint32_t method_idx,
+                                     const DexFile& dex_file,
                                      CompilationUnit* cu) const {
   // This is a limitation in mir_graph. See MirGraph::SetNumSSARegs.
   if (cu->mir_graph->GetNumOfCodeAndTempVRs() > kMaxAllowedDalvikRegisters) {
@@ -580,6 +605,9 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil
               << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
         }
         return false;
+      } else if (!CanCompileInstruction(mir, dex_file)) {
+        VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode;
+        return false;
       }
       // Check if it invokes a prototype that we cannot support.
       if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode)
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index d512b256cd..55f45f1ab0 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
 
 #include "compiler.h"
+#include "dex/mir_graph.h"
 
 namespace art {
 
@@ -74,6 +75,8 @@ class QuickCompiler : public Compiler {
   explicit QuickCompiler(CompilerDriver* driver);
 
  private:
+  bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const;
+
   std::unique_ptr<PassManager> pre_opt_pass_manager_;
   std::unique_ptr<PassManager> post_opt_pass_manager_;
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index 9deabc02e9..b39fe4da4f 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -73,7 +73,7 @@ class QuickAssembleX86TestBase : public testing::Test {
         false,
         0,
         -1,
-        "",
+        nullptr,
         nullptr));
     cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
     DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 84fb4324b5..f18fa67ea5 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -45,7 +45,7 @@ TEST(CompiledMethodStorage, Deduplicate) {
                         false,
                         nullptr,
                         -1,
-                        "",
+                        nullptr,
                         nullptr);
   CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index afb4b71ccf..043bd93bd7 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -347,8 +347,8 @@ CompilerDriver::CompilerDriver(
     size_t thread_count, bool dump_stats, bool dump_passes,
     const std::string& dump_cfg_file_name, bool dump_cfg_append,
     CumulativeLogger* timer, int swap_fd,
-    const std::string& profile_file,
-    const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map)
+    const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+    const ProfileCompilationInfo* profile_compilation_info)
     : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
@@ -377,7 +377,8 @@ CompilerDriver::CompilerDriver(
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
       dex_files_for_oat_file_(nullptr),
       dex_file_oat_filename_map_(dex_to_oat_map),
-      compiled_method_storage_(swap_fd) {
+      compiled_method_storage_(swap_fd),
+      profile_compilation_info_(profile_compilation_info) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
@@ -385,12 +386,6 @@ CompilerDriver::CompilerDriver(
   compiler_->Init();
 
   CHECK_EQ(boot_image_, image_classes_.get() != nullptr);
-
-  // Read the profile file if one is provided.
-  if (!profile_file.empty()) {
-    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
-    LOG(INFO) << "Using profile data from file " << profile_file;
-  }
 }
 
 CompilerDriver::~CompilerDriver() {
@@ -2306,15 +2301,11 @@ void CompilerDriver::InitializeClasses(jobject class_loader,
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
-  if (profile_compilation_info_ != nullptr) {
-    if (!profile_compilation_info_->Load(dex_files)) {
-      LOG(WARNING) << "Failed to load offline profile info from "
-          << profile_compilation_info_->GetFilename()
-          << ". No methods will be compiled";
-    } else if (kDebugProfileGuidedCompilation) {
-      LOG(INFO) << "[ProfileGuidedCompilation] "
-          << profile_compilation_info_->DumpInfo();
-    }
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] " <<
+        ((profile_compilation_info_ == nullptr)
+            ? "null"
+            : profile_compilation_info_->DumpInfo(&dex_files));
   }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fa0cb9a412..3847c8183e 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,8 +97,8 @@ class CompilerDriver {
                  size_t thread_count, bool dump_stats, bool dump_passes,
                  const std::string& dump_cfg_file_name, bool dump_cfg_append,
                  CumulativeLogger* timer, int swap_fd,
-                 const std::string& profile_file,
-                 const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map);
+                 const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+                 const ProfileCompilationInfo* profile_compilation_info);
 
   ~CompilerDriver();
 
@@ -657,9 +657,6 @@ class CompilerDriver {
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
-  // Info for profile guided compilation.
-  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
-
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
@@ -689,6 +686,9 @@ class CompilerDriver {
 
   CompiledMethodStorage compiled_method_storage_;
 
+  // Info for profile guided compilation.
+  const ProfileCompilationInfo* const profile_compilation_info_;
+
   friend class CompileClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 209bb5a3c2..385f34a9f9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -211,11 +211,9 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
     generate_debug_info_ = false;
   } else if (option == "--debuggable") {
     debuggable_ = true;
-    generate_debug_info_ = true;
   } else if (option == "--native-debuggable") {
     native_debuggable_ = true;
     debuggable_ = true;
-    generate_debug_info_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
   } else if (option == "--include-patch-information") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f8032bb514..f14bdc4a2f 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,7 @@ class CompilerOptions FINAL {
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultNativeDebuggable = false;
-  static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
+  static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
index a391e4d08a..e8ba9148e8 100644
--- a/compiler/dwarf/method_debug_info.h
+++ b/compiler/dwarf/method_debug_info.h
@@ -30,8 +30,8 @@ struct MethodDebugInfo {
   uint32_t access_flags_;
   const DexFile::CodeItem* code_item_;
   bool deduped_;
-  uint32_t low_pc_;
-  uint32_t high_pc_;
+  uintptr_t low_pc_;
+  uintptr_t high_pc_;
   CompiledMethod* compiled_method_;
 };
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bb07cc2913..a7461a5525 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -148,6 +148,12 @@ class ElfBuilder FINAL {
       }
     }
 
+    // Returns true if the section was written to disk.
+    // (Used to check whether we have .text when writing JIT debug info)
+    bool Exists() const {
+      return finished_;
+    }
+
     // Get the location of this section in virtual memory.
     Elf_Addr GetAddress() const {
       CHECK(started_);
@@ -247,16 +253,18 @@ class ElfBuilder FINAL {
     }
 
     // Buffer symbol for this section.  It will be written later.
+    // If the symbol's section is null, it will be considered absolute (SHN_ABS).
+    // (we use this in JIT to reference code which is stored outside the debug ELF file)
     void Add(Elf_Word name, const Section* section,
              Elf_Addr addr, bool is_relative, Elf_Word size,
              uint8_t binding, uint8_t type, uint8_t other = 0) {
-      CHECK(section != nullptr);
       Elf_Sym sym = Elf_Sym();
       sym.st_name = name;
       sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
       sym.st_size = size;
       sym.st_other = other;
-      sym.st_shndx = section->GetSectionIndex();
+      sym.st_shndx = (section != nullptr ? section->GetSectionIndex()
+                                         : static_cast<Elf_Word>(SHN_ABS));
       sym.st_info = (binding << 4) + (type & 0xf);
       symbols_.push_back(sym);
     }
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2bc8c89f73..dd50f69b71 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -22,16 +22,20 @@
 #include "base/casts.h"
 #include "base/stl_util.h"
 #include "compiled_method.h"
-#include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "driver/compiler_driver.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
 #include "oat_writer.h"
-#include "utils.h"
 #include "stack_map.h"
+#include "utils.h"
 
 namespace art {
 namespace dwarf {
@@ -219,6 +223,10 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder,
   CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
+  if (method_infos.empty()) {
+    return;
+  }
+
   std::vector<uint32_t> binary_search_table;
   std::vector<uintptr_t> patch_locations;
   if (format == DW_EH_FRAME_FORMAT) {
@@ -234,7 +242,9 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder,
   {
     cfi_section->Start();
     const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
-    const Elf_Addr text_address = builder->GetText()->GetAddress();
+    const Elf_Addr text_address = builder->GetText()->Exists()
+        ? builder->GetText()->GetAddress()
+        : 0;
     const Elf_Addr cfi_address = cfi_section->GetAddress();
     const Elf_Addr cie_address = cfi_address;
     Elf_Addr buffer_address = cfi_address;
@@ -305,8 +315,8 @@ namespace {
   struct CompilationUnit {
     std::vector<const MethodDebugInfo*> methods_;
     size_t debug_line_offset_ = 0;
-    uint32_t low_pc_ = 0xFFFFFFFFU;
-    uint32_t high_pc_ = 0;
+    uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max();
+    uintptr_t high_pc_ = 0;
   };
 
   typedef std::vector<DexFile::LocalInfo> LocalInfos;
@@ -439,14 +449,17 @@ class DebugInfoWriter {
 
     void Write(const CompilationUnit& compilation_unit) {
       CHECK(!compilation_unit.methods_.empty());
-      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+      const Elf_Addr text_address = owner_->builder_->GetText()->Exists()
+          ? owner_->builder_->GetText()->GetAddress()
+          : 0;
+      const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
 
       info_.StartTag(DW_TAG_compile_unit);
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
       info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
-      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
 
       const char* last_dex_class_desc = nullptr;
@@ -464,8 +477,16 @@ class DebugInfoWriter {
           if (last_dex_class_desc != nullptr) {
             EndClassTag(last_dex_class_desc);
           }
-          size_t offset = StartClassTag(dex_class_desc);
-          type_cache_.emplace(dex_class_desc, offset);
+          // Write reference tag for the class we are about to declare.
+          size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
+          type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset);
+          size_t type_attrib_offset = info_.size();
+          info_.WriteRef4(DW_AT_type, 0);
+          info_.EndTag();
+          // Declare the class that owns this method.
+          size_t class_offset = StartClassTag(dex_class_desc);
+          info_.UpdateUint32(type_attrib_offset, class_offset);
+          info_.WriteFlag(DW_AT_declaration, true);
           // Check that each class is defined only once.
           bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
           CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -476,7 +497,7 @@ class DebugInfoWriter {
         info_.StartTag(DW_TAG_subprogram);
         WriteName(dex->GetMethodName(dex_method));
         info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
-        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_));
         uint8_t frame_base[] = { DW_OP_call_frame_cfa };
         info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
@@ -562,6 +583,92 @@ class DebugInfoWriter {
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
+    void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+
+      for (mirror::Class* type : types) {
+        if (type->IsPrimitive()) {
+          // For primitive types the definition and the declaration is the same.
+          if (type->GetPrimitiveType() != Primitive::kPrimVoid) {
+            WriteTypeDeclaration(type->GetDescriptor(nullptr));
+          }
+        } else if (type->IsArrayClass()) {
+          mirror::Class* element_type = type->GetComponentType();
+          uint32_t component_size = type->GetComponentSize();
+          uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+          uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+          info_.StartTag(DW_TAG_array_type);
+          std::string descriptor_string;
+          WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+          info_.WriteUdata(DW_AT_data_member_location, data_offset);
+          info_.StartTag(DW_TAG_subrange_type);
+          DCHECK_LT(length_offset, 32u);
+          uint8_t count[] = {
+            DW_OP_push_object_address,
+            static_cast<uint8_t>(DW_OP_lit0 + length_offset),
+            DW_OP_plus,
+            DW_OP_deref_size,
+            4  // Array length is always 32-bit wide.
+          };
+          info_.WriteExprLoc(DW_AT_count, &count, sizeof(count));
+          info_.EndTag();  // DW_TAG_subrange_type.
+          info_.EndTag();  // DW_TAG_array_type.
+        } else {
+          std::string descriptor_string;
+          const char* desc = type->GetDescriptor(&descriptor_string);
+          StartClassTag(desc);
+
+          if (!type->IsVariableSize()) {
+            info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
+          }
+
+          // Base class.
+          mirror::Class* base_class = type->GetSuperClass();
+          if (base_class != nullptr) {
+            info_.StartTag(DW_TAG_inheritance);
+            WriteLazyType(base_class->GetDescriptor(&descriptor_string));
+            info_.WriteUdata(DW_AT_data_member_location, 0);
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            info_.EndTag();  // DW_TAG_inheritance.
+          }
+
+          // Member variables.
+          for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) {
+            ArtField* field = type->GetInstanceField(i);
+            info_.StartTag(DW_TAG_member);
+            WriteName(field->GetName());
+            WriteLazyType(field->GetTypeDescriptor());
+            info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value());
+            uint32_t access_flags = field->GetAccessFlags();
+            if (access_flags & kAccPublic) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            } else if (access_flags & kAccProtected) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected);
+            } else if (access_flags & kAccPrivate) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+            }
+            info_.EndTag();  // DW_TAG_member.
+          }
+
+          EndClassTag(desc);
+        }
+      }
+
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit.
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
@@ -715,14 +822,14 @@ class DebugInfoWriter {
     // just define all types lazily at the end of compilation unit.
     void WriteLazyType(const char* type_descriptor) {
       if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
-        lazy_types_.emplace(type_descriptor, info_.size());
+        lazy_types_.emplace(std::string(type_descriptor), info_.size());
         info_.WriteRef4(DW_AT_type, 0);
       }
     }
 
     void FinishLazyTypes() {
       for (const auto& lazy_type : lazy_types_) {
-        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+        info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first));
       }
       lazy_types_.clear();
     }
@@ -747,30 +854,39 @@ class DebugInfoWriter {
 
     // Convert dex type descriptor to DWARF.
     // Returns offset in the compilation unit.
-    size_t WriteType(const char* desc) {
+    size_t WriteTypeDeclaration(const std::string& desc) {
+      DCHECK(!desc.empty());
       const auto& it = type_cache_.find(desc);
       if (it != type_cache_.end()) {
         return it->second;
       }
 
       size_t offset;
-      if (*desc == 'L') {
+      if (desc[0] == 'L') {
         // Class type. For example: Lpackage/name;
-        offset = StartClassTag(desc);
+        size_t class_offset = StartClassTag(desc.c_str());
         info_.WriteFlag(DW_AT_declaration, true);
-        EndClassTag(desc);
-      } else if (*desc == '[') {
+        EndClassTag(desc.c_str());
+        // Reference to the class type.
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef(DW_AT_type, class_offset);
+        info_.EndTag();
+      } else if (desc[0] == '[') {
         // Array type.
-        size_t element_type = WriteType(desc + 1);
-        offset = info_.StartTag(DW_TAG_array_type);
+        size_t element_type = WriteTypeDeclaration(desc.substr(1));
+        size_t array_type = info_.StartTag(DW_TAG_array_type);
+        info_.WriteFlag(DW_AT_declaration, true);
         info_.WriteRef(DW_AT_type, element_type);
         info_.EndTag();
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef4(DW_AT_type, array_type);
+        info_.EndTag();
       } else {
         // Primitive types.
         const char* name;
         uint32_t encoding;
         uint32_t byte_size;
-        switch (*desc) {
+        switch (desc[0]) {
         case 'B':
           name = "byte";
           encoding = DW_ATE_signed;
@@ -815,7 +931,7 @@ class DebugInfoWriter {
           LOG(FATAL) << "Void type should not be encoded";
           UNREACHABLE();
         default:
-          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
           UNREACHABLE();
         }
         offset = info_.StartTag(DW_TAG_base_type);
@@ -865,9 +981,10 @@ class DebugInfoWriter {
     // Temporary buffer to create and store the entries.
     DebugInfoEntryWriter<> info_;
     // Cache of already translated type descriptors.
-    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    std::map<std::string, size_t> type_cache_;  // type_desc -> definition_offset.
     // 32-bit references which need to be resolved to a type later.
-    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+    // Given type may be used multiple times.  Therefore we need a multimap.
+    std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
   };
 
  public:
@@ -883,6 +1000,11 @@ class DebugInfoWriter {
     writer.Write(compilation_unit);
   }
 
+  void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+    CompilationUnitWriter writer(this);
+    writer.Write(types);
+  }
+
   void End() {
     builder_->GetDebugInfo()->End();
     builder_->WritePatches(".debug_info.oat_patches",
@@ -924,7 +1046,9 @@ class DebugLineWriter {
   // Returns the number of bytes written.
   size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->GetAddress();
+    const Elf_Addr text_address = builder_->GetText()->Exists()
+        ? builder_->GetText()->GetAddress()
+        : 0;
 
     compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
 
@@ -1102,9 +1226,27 @@ class DebugLineWriter {
   std::vector<uintptr_t> debug_line_patches;
 };
 
+// Get all types loaded by the runtime.
+static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<mirror::Class*> result;
+  class CollectClasses : public ClassVisitor {
+   public:
+    virtual bool Visit(mirror::Class* klass) {
+      classes_->push_back(klass);
+      return true;
+    }
+    std::vector<mirror::Class*>* classes_;
+  };
+  CollectClasses visitor;
+  visitor.classes_ = &result;
+  Runtime::Current()->GetClassLinker()->VisitClasses(&visitor);
+  return result;
+}
+
 template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos) {
+static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                               bool write_loaded_runtime_types,
+                               const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
@@ -1122,7 +1264,7 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
   }
 
   // Write .debug_line section.
-  {
+  if (!compilation_units.empty()) {
     DebugLineWriter<ElfTypes> line_writer(builder);
     line_writer.Start();
     for (auto& compilation_unit : compilation_units) {
@@ -1132,12 +1274,19 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
   }
 
   // Write .debug_info section.
-  {
+  if (!compilation_units.empty() || write_loaded_runtime_types) {
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
       info_writer.WriteCompilationUnit(compilation_unit);
     }
+    if (write_loaded_runtime_types) {
+      Thread* self = Thread::Current();
+      // The lock prevents the classes being moved by the GC.
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      std::vector<mirror::Class*> types = GetLoadedRuntimeTypes();
+      info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size()));
+    }
     info_writer.End();
   }
 }
@@ -1173,11 +1322,13 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
       name += " [DEDUPED]";
     }
 
+    const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
+    const bool is_relative = (text != nullptr);
     uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+    symtab->Add(strtab->Write(name), text, low_pc,
+                is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -1185,8 +1336,8 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
     // requires it to match function symbol.  Just address 0 does not work.
     if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
+        symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1,
+                    is_relative, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
     }
@@ -1202,25 +1353,89 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format) {
-  if (!method_infos.empty()) {
-    // Add methods to .symtab.
-    WriteDebugSymbols(builder, method_infos);
-    // Generate CFI (stack unwinding information).
-    WriteCFISection(builder, method_infos, cfi_format);
-    // Write DWARF .debug_* sections.
-    WriteDebugSections(builder, method_infos);
+  // Add methods to .symtab.
+  WriteDebugSymbols(builder, method_infos);
+  // Generate CFI (stack unwinding information).
+  WriteCFISection(builder, method_infos, cfi_format);
+  // Write DWARF .debug_* sections.
+  WriteDebugSections(builder, write_loaded_runtime_types, method_infos);
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
+    const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+  WriteDebugInfo(builder.get(),
+                 false,
+                 ArrayRef<const MethodDebugInfo>(&method_info, 1),
+                 DW_DEBUG_FRAME_FORMAT);
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info);
+  } else {
+    return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info);
+  }
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa,
+                                                                 mirror::Class* type)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+
+  DebugInfoWriter<ElfTypes> info_writer(builder.get());
+  info_writer.Start();
+  info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1));
+  info_writer.End();
+
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) {
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type);
+  } else {
+    return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type);
   }
 }
 
 // Explicit instantiations
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
+    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
+    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 7ec0be185a..91da00f97a 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,19 +17,30 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include "elf_builder.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dwarf/dwarf_constants.h"
-#include "oat_writer.h"
+#include "elf_builder.h"
 #include "utils/array_ref.h"
 
 namespace art {
+namespace mirror {
+class Class;
+}
 namespace dwarf {
+struct MethodDebugInfo;
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format);
 
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
 }  // namespace dwarf
 }  // namespace art
 
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 7b1bdd72e5..a67f3bd1a9 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -152,7 +152,7 @@ template <typename ElfTypes>
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
-    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
+    dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat);
   }
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 17d0f61a34..d0bb201d69 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -76,23 +76,35 @@ static constexpr bool kBinObjects = true;
 
 // Return true if an object is already in an image space.
 bool ImageWriter::IsInBootImage(const void* obj) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   if (!compile_app_image_) {
-    DCHECK(boot_image_space_ == nullptr);
+    DCHECK(heap->GetBootImageSpaces().empty());
     return false;
   }
-  const uint8_t* image_begin = boot_image_space_->Begin();
-  // Real image end including ArtMethods and ArtField sections.
-  const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize();
-  return image_begin <= obj && obj < image_end;
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const uint8_t* image_begin = boot_image_space->Begin();
+    // Real image end including ArtMethods and ArtField sections.
+    const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize();
+    if (image_begin <= obj && obj < image_end) {
+      return true;
+    }
+  }
+  return false;
 }
 
 bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   if (!compile_app_image_) {
-    DCHECK(boot_image_space_ == nullptr);
+    DCHECK(heap->GetBootImageSpaces().empty());
     return false;
   }
-  const ImageHeader& image_header = boot_image_space_->GetImageHeader();
-  return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd();
+  for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+    const ImageHeader& image_header = boot_image_space->GetImageHeader();
+    if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) {
+      return true;
+    }
+  }
+  return false;
 }
 
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
@@ -109,14 +121,6 @@ static void CheckNoDexObjects() {
 bool ImageWriter::PrepareImageAddressSpace() {
   target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
   gc::Heap* const heap = Runtime::Current()->GetHeap();
-  // Cache boot image space.
-    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        CHECK(compile_app_image_);
-        CHECK(boot_image_space_ == nullptr) << "Multiple image spaces";
-        boot_image_space_ = space->AsImageSpace();
-      }
-    }
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
@@ -205,9 +209,6 @@ bool ImageWriter::Write(int image_fd,
           oat_header.GetQuickResolutionTrampolineOffset();
       image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
           oat_header.GetQuickToInterpreterBridgeOffset();
-    } else {
-      // Other oat files use the primary trampolines.
-      // TODO: Dummy values to protect usage? b/26317072
     }
 
 
@@ -635,11 +636,11 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const
 bool ImageWriter::AllocMemory() {
   for (const char* oat_filename : oat_filenames_) {
     ImageInfo& image_info = GetImageInfo(oat_filename);
-    const size_t length = RoundUp(image_objects_offset_begin_ +
-                                  GetBinSizeSum(image_info) +
-                                  intern_table_bytes_ +
-                                  class_table_bytes_,
-                                  kPageSize);
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    const size_t length = RoundUp(
+        image_info.CreateImageSections(target_ptr_size_, unused_sections),
+        kPageSize);
+
     std::string error_msg;
     image_info.image_.reset(MemMap::MapAnonymous("image writer image",
                                                  nullptr,
@@ -909,14 +910,17 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) {
   DCHECK(obj != nullptr);
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
+    const char* oat_filename = GetOatFilename(obj);
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+
     // we must be an interned string that was forward referenced and already assigned
     if (IsImageBinSlotAssigned(obj)) {
-      DCHECK_EQ(obj, obj->AsString()->Intern());
+      DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString()));
       return;
     }
     // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
     // we are guaranteed to not have GC during image writing.
-    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString(
+    mirror::String* const interned = image_info.intern_table_->InternStrongImageString(
         obj->AsString());
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
@@ -1067,6 +1071,13 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
       };
       const char* oat_file = GetOatFilenameForDexCache(dex_cache);
       ImageInfo& image_info = GetImageInfo(oat_file);
+      {
+        // Note: This table is only accessed from the image writer, so the lock is technically
+        // unnecessary.
+        WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+        // Insert in the class table for this iamge.
+        image_info.class_table_->Insert(as_klass);
+      }
       for (LengthPrefixedArray<ArtField>* cur_fields : fields) {
         // Total array length including header.
         if (cur_fields != nullptr) {
@@ -1249,6 +1260,18 @@ void ImageWriter::CalculateNewObjectOffsets() {
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
 
+  // Calculate the sizes of the intern tables and class tables.
+  for (const char* oat_filename : oat_filenames_) {
+    ImageInfo& image_info = GetImageInfo(oat_filename);
+    // Calculate how big the intern table will be after being serialized.
+    InternTable* const intern_table = image_info.intern_table_.get();
+    CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
+    image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+    // Calculate the size of the class table.
+    ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
+  }
+
   // Calculate bin slot offsets.
   for (const char* oat_filename : oat_filenames_) {
     ImageInfo& image_info = GetImageInfo(oat_filename);
@@ -1275,18 +1298,11 @@ void ImageWriter::CalculateNewObjectOffsets() {
     ImageInfo& image_info = GetImageInfo(oat_filename);
     image_info.image_begin_ = global_image_begin_ + image_offset;
     image_info.image_offset_ = image_offset;
-    size_t native_sections_size = image_info.bin_slot_sizes_[kBinArtField] +
-                                  image_info.bin_slot_sizes_[kBinArtMethodDirty] +
-                                  image_info.bin_slot_sizes_[kBinArtMethodClean] +
-                                  image_info.bin_slot_sizes_[kBinDexCacheArray] +
-                                  intern_table_bytes_ +
-                                  class_table_bytes_;
-    size_t image_objects = RoundUp(image_info.image_end_, kPageSize);
-    size_t bitmap_size =
-        RoundUp(gc::accounting::ContinuousSpaceBitmap::ComputeBitmapSize(image_objects), kPageSize);
-    size_t heap_size = gc::accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_size);
-    size_t max = std::max(heap_size, image_info.image_end_ + native_sections_size + bitmap_size);
-    image_info.image_size_ = RoundUp(max, kPageSize);
+    ImageSection unused_sections[ImageHeader::kSectionCount];
+    image_info.image_size_ = RoundUp(
+        image_info.CreateImageSections(target_ptr_size_, unused_sections),
+        kPageSize);
+    // There should be no gaps until the next image.
     image_offset += image_info.image_size_;
   }
 
@@ -1310,89 +1326,69 @@ void ImageWriter::CalculateNewObjectOffsets() {
     relocation.offset += image_info.bin_slot_offsets_[bin_type];
   }
 
-  /* TODO: Reenable the intern table and class table. b/26317072
-  // Calculate how big the intern table will be after being serialized.
-  InternTable* const intern_table = runtime->GetInternTable();
-  CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
-  intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
-
-  // Write out the class table.
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  if (boot_image_space_ == nullptr) {
-    // Compiling the boot image, add null class loader.
-    class_loaders_.insert(nullptr);
-  }
-  // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
-  // with no classes.
-  if (class_loaders_.size() == 1u) {
-    // Only write the class table if we have exactly one class loader. There may be cases where
-    // there are multiple class loaders if a class path is passed to dex2oat.
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (mirror::ClassLoader* loader : class_loaders_) {
-      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-      CHECK(table != nullptr);
-      class_table_bytes_ += table->WriteToMemory(nullptr);
-    }
-  }
-  */
-
   // Note that image_info.image_end_ is left at end of used mirror object section.
 }
 
-void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
-  CHECK_NE(0U, oat_loaded_size);
-  const char* oat_filename = oat_file_->GetLocation().c_str();
-  ImageInfo& image_info = GetImageInfo(oat_filename);
-  const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename);
-  const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
-  image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset;
-  const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size();
-  image_info.oat_size_ = oat_file_->Size();
-
-  // Create the image sections.
-  ImageSection sections[ImageHeader::kSectionCount];
+size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size,
+                                                   ImageSection* out_sections) const {
+  DCHECK(out_sections != nullptr);
   // Objects section
-  auto* objects_section = &sections[ImageHeader::kSectionObjects];
-  *objects_section = ImageSection(0u, image_info.image_end_);
+  auto* objects_section = &out_sections[ImageHeader::kSectionObjects];
+  *objects_section = ImageSection(0u, image_end_);
   size_t cur_pos = objects_section->End();
   // Add field section.
-  auto* field_section = &sections[ImageHeader::kSectionArtFields];
-  *field_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinArtField]);
-  CHECK_EQ(image_info.bin_slot_offsets_[kBinArtField], field_section->Offset());
+  auto* field_section = &out_sections[ImageHeader::kSectionArtFields];
+  *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]);
+  CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
   cur_pos = field_section->End();
   // Round up to the alignment the required by the method section.
-  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_));
+  cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size));
   // Add method section.
-  auto* methods_section = &sections[ImageHeader::kSectionArtMethods];
+  auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
   *methods_section = ImageSection(cur_pos,
-                                  image_info.bin_slot_sizes_[kBinArtMethodClean] +
-                                      image_info.bin_slot_sizes_[kBinArtMethodDirty]);
-  CHECK_EQ(image_info.bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
+                                  bin_slot_sizes_[kBinArtMethodClean] +
+                                      bin_slot_sizes_[kBinArtMethodDirty]);
+  CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
   cur_pos = methods_section->End();
   // Add dex cache arrays section.
-  auto* dex_cache_arrays_section = &sections[ImageHeader::kSectionDexCacheArrays];
-  *dex_cache_arrays_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinDexCacheArray]);
-  CHECK_EQ(image_info.bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
+  auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
+  *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]);
+  CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
   cur_pos = dex_cache_arrays_section->End();
   // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
   cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the interned strings.
-  auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
+  auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
   // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
   cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the class table section.
-  auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+  auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
   *class_table_section = ImageSection(cur_pos, class_table_bytes_);
   cur_pos = class_table_section->End();
   // Image end goes right before the start of the image bitmap.
-  const size_t image_end = static_cast<uint32_t>(cur_pos);
+  return cur_pos;
+}
+
+void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
+  CHECK_NE(0U, oat_loaded_size);
+  const char* oat_filename = oat_file_->GetLocation().c_str();
+  ImageInfo& image_info = GetImageInfo(oat_filename);
+  const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename);
+  const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
+  image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset;
+  const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size();
+  image_info.oat_size_ = oat_file_->Size();
+
+  // Create the image sections.
+  ImageSection sections[ImageHeader::kSectionCount];
+  const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections);
+
   // Finally bitmap section.
   const size_t bitmap_bytes = image_info.image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
-  *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize));
-  cur_pos = bitmap_section->End();
+  *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize));
   if (VLOG_IS_ON(compiler)) {
     LOG(INFO) << "Creating header for " << oat_filename;
     size_t idx = 0;
@@ -1444,7 +1440,7 @@ class FixupRootVisitor : public RootVisitor {
   void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      *roots[i] = ImageAddress(*roots[i]);
+      *roots[i] = image_writer_->GetImageAddress(*roots[i]);
     }
   }
 
@@ -1452,19 +1448,12 @@ class FixupRootVisitor : public RootVisitor {
                   const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr()));
+      roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
     }
   }
 
  private:
   ImageWriter* const image_writer_;
-
-  mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
-    const size_t offset = image_writer_->GetImageOffset(obj);
-    auto* const dest = reinterpret_cast<Object*>(image_writer_->global_image_begin_ + offset);
-    VLOG(compiler) << "Update root from " << obj << " to " << dest;
-    return dest;
-  }
 };
 
 void ImageWriter::CopyAndFixupNativeData() {
@@ -1536,54 +1525,48 @@ void ImageWriter::CopyAndFixupNativeData() {
   }
   FixupRootVisitor root_visitor(this);
 
-  /* TODO: Reenable the intern table and class table
   // Write the intern table into the image.
-  const ImageSection& intern_table_section = image_header->GetImageSection(
-      ImageHeader::kSectionInternedStrings);
-  Runtime* const runtime = Runtime::Current();
-  InternTable* const intern_table = runtime->GetInternTable();
-  uint8_t* const intern_table_memory_ptr =
-      image_info.image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
-  // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_intern_table;
-  // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
-  // the VisitRoots() will update the memory directly rather than the copies.
-  // This also relies on visit roots not doing any verification which could fail after we update
-  // the roots to be the image addresses.
-  temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
-  CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
-  temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
-
+  if (image_info.intern_table_bytes_ > 0) {
+    const ImageSection& intern_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionInternedStrings);
+    InternTable* const intern_table = image_info.intern_table_.get();
+    uint8_t* const intern_table_memory_ptr =
+        image_info.image_->Begin() + intern_table_section.Offset();
+    const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+    CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_);
+    // Fixup the pointers in the newly written intern table to contain image addresses.
+    InternTable temp_intern_table;
+    // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
+    // the VisitRoots() will update the memory directly rather than the copies.
+    // This also relies on visit roots not doing any verification which could fail after we update
+    // the roots to be the image addresses.
+    temp_intern_table.AddTableFromMemory(intern_table_memory_ptr);
+    CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+    temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+  }
   // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
   // class loaders. Writing multiple class tables into the image is currently unsupported.
-  if (class_table_bytes_ > 0u) {
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  if (image_info.class_table_bytes_ > 0u) {
     const ImageSection& class_table_section = image_header->GetImageSection(
         ImageHeader::kSectionClassTable);
     uint8_t* const class_table_memory_ptr =
         image_info.image_->Begin() + class_table_section.Offset();
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    size_t class_table_bytes = 0;
-    for (mirror::ClassLoader* loader : class_loaders_) {
-      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-      CHECK(table != nullptr);
-      uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
-      class_table_bytes += table->WriteToMemory(memory_ptr);
-      // Fixup the pointers in the newly written class table to contain image addresses. See
-      // above comment for intern tables.
-      ClassTable temp_class_table;
-      temp_class_table.ReadFromMemory(memory_ptr);
-      CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
-               table->NumZygoteClasses());
-      BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
-                                                                      RootInfo(kRootUnknown));
-      temp_class_table.VisitRoots(buffered_visitor);
-    }
-    CHECK_EQ(class_table_bytes, class_table_bytes_);
+
+    ClassTable* table = image_info.class_table_.get();
+    CHECK(table != nullptr);
+    const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr);
+    CHECK_EQ(class_table_bytes, image_info.class_table_bytes_);
+    // Fixup the pointers in the newly written class table to contain image addresses. See
+    // above comment for intern tables.
+    ClassTable temp_class_table;
+    temp_class_table.ReadFromMemory(class_table_memory_ptr);
+    CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+             table->NumZygoteClasses());
+    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                    RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(buffered_visitor);
   }
-  */
 }
 
 void ImageWriter::CopyAndFixupObjects() {
@@ -1991,7 +1974,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
   copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
 
   const char* oat_filename;
-  if (orig->IsRuntimeMethod()) {
+  if (orig->IsRuntimeMethod() || compile_app_image_) {
     oat_filename = default_oat_filename_;
   } else {
     auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
@@ -2110,7 +2093,6 @@ uint32_t ImageWriter::BinSlot::GetIndex() const {
 }
 
 uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const {
-  // DCHECK_GT(intern_table_bytes_, 0u);  TODO: Reenable intern table and class table.
   uintptr_t last_image_end = 0;
   for (const char* oat_fn : oat_filenames_) {
     const ImageInfo& image_info = GetConstImageInfo(oat_fn);
@@ -2197,4 +2179,37 @@ void ImageWriter::UpdateOatFile(const char* oat_filename) {
   }
 }
 
+ImageWriter::ImageWriter(
+    const CompilerDriver& compiler_driver,
+    uintptr_t image_begin,
+    bool compile_pic,
+    bool compile_app_image,
+    ImageHeader::StorageMode image_storage_mode,
+    const std::vector<const char*> oat_filenames,
+    const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map)
+    : compiler_driver_(compiler_driver),
+      global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+      image_objects_offset_begin_(0),
+      oat_file_(nullptr),
+      compile_pic_(compile_pic),
+      compile_app_image_(compile_app_image),
+      target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
+      image_method_array_(ImageHeader::kImageMethodsCount),
+      dirty_methods_(0u),
+      clean_methods_(0u),
+      image_storage_mode_(image_storage_mode),
+      dex_file_oat_filename_map_(dex_file_oat_filename_map),
+      oat_filenames_(oat_filenames),
+      default_oat_filename_(oat_filenames[0]) {
+  CHECK_NE(image_begin, 0U);
+  for (const char* oat_filename : oat_filenames) {
+    image_info_map_.emplace(oat_filename, ImageInfo());
+  }
+  std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+}
+
+ImageWriter::ImageInfo::ImageInfo()
+    : intern_table_(new InternTable),
+      class_table_(new ClassTable) {}
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 78297ae645..ad690389e9 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -47,6 +47,8 @@ class ImageSpace;
 }  // namespace space
 }  // namespace gc
 
+class ClassTable;
+
 static constexpr int kInvalidImageFd = -1;
 
 // Write a Space built during compilation for use during execution.
@@ -58,33 +60,7 @@ class ImageWriter FINAL {
               bool compile_app_image,
               ImageHeader::StorageMode image_storage_mode,
               const std::vector<const char*> oat_filenames,
-              const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map)
-      : compiler_driver_(compiler_driver),
-        global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
-        image_objects_offset_begin_(0),
-        oat_file_(nullptr),
-        compile_pic_(compile_pic),
-        compile_app_image_(compile_app_image),
-        boot_image_space_(nullptr),
-        target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        intern_table_bytes_(0u),
-        image_method_array_(ImageHeader::kImageMethodsCount),
-        dirty_methods_(0u),
-        clean_methods_(0u),
-        class_table_bytes_(0u),
-        image_storage_mode_(image_storage_mode),
-        dex_file_oat_filename_map_(dex_file_oat_filename_map),
-        oat_filenames_(oat_filenames),
-        default_oat_filename_(oat_filenames[0]) {
-    CHECK_NE(image_begin, 0U);
-    for (const char* oat_filename : oat_filenames) {
-      image_info_map_.emplace(oat_filename, ImageInfo());
-    }
-    std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
-  }
-
-  ~ImageWriter() {
-  }
+              const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map);
 
   bool PrepareImageAddressSpace();
 
@@ -237,41 +213,40 @@ class ImageWriter FINAL {
   };
 
   struct ImageInfo {
-    explicit ImageInfo()
-        : image_begin_(nullptr),
-          image_end_(RoundUp(sizeof(ImageHeader), kObjectAlignment)),
-          image_roots_address_(0),
-          image_offset_(0),
-          image_size_(0),
-          oat_offset_(0),
-          bin_slot_sizes_(),
-          bin_slot_offsets_(),
-          bin_slot_count_() {}
+    ImageInfo();
+    ImageInfo(ImageInfo&&) = default;
+
+    // Create the image sections into the out sections variable, returns the size of the image
+    // excluding the bitmap.
+    size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const;
 
     std::unique_ptr<MemMap> image_;  // Memory mapped for generating the image.
 
     // Target begin of this image. Notes: It is not valid to write here, this is the address
     // of the target image, not necessarily where image_ is mapped. The address is only valid
     // after layouting (otherwise null).
-    uint8_t* image_begin_;
+    uint8_t* image_begin_ = nullptr;
 
-    size_t image_end_;  // Offset to the free space in image_, initially size of image header.
-    uint32_t image_roots_address_;  // The image roots address in the image.
-    size_t image_offset_;  // Offset of this image from the start of the first image.
+    // Offset to the free space in image_, initially size of image header.
+    size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    uint32_t image_roots_address_ = 0;  // The image roots address in the image.
+    size_t image_offset_ = 0;  // Offset of this image from the start of the first image.
 
     // Image size is the *address space* covered by this image. As the live bitmap is aligned
     // to the page size, the live bitmap will cover more address space than necessary. But live
     // bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size.
     // The next image may only start at image_begin_ + image_size_ (which is guaranteed to be
     // page-aligned).
-    size_t image_size_;
+    size_t image_size_ = 0;
 
     // Oat data.
-    size_t oat_offset_;  // Offset of the oat file for this image from start of oat files. This is
-                         // valid when the previous oat file has been written.
-    uint8_t* oat_data_begin_;           // Start of oatdata in the corresponding oat file. This is
-                                        // valid when the images have been layed out.
-    size_t oat_size_;                   // Size of the corresponding oat data.
+    // Offset of the oat file for this image from start of oat files. This is
+    // valid when the previous oat file has been written.
+    size_t oat_offset_ = 0;
+    // Start of oatdata in the corresponding oat file. This is
+    // valid when the images have been layed out.
+    uint8_t* oat_data_begin_ = nullptr;
+    size_t oat_size_ = 0;  // Size of the corresponding oat data.
 
     // Image bitmap which lets us know where the objects inside of the image reside.
     std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
@@ -280,12 +255,24 @@ class ImageWriter FINAL {
     SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
 
     // Offset from oat_data_begin_ to the stubs.
-    uint32_t oat_address_offsets_[kOatAddressCount];
+    uint32_t oat_address_offsets_[kOatAddressCount] = {};
 
     // Bin slot tracking for dirty object packing.
-    size_t bin_slot_sizes_[kBinSize];  // Number of bytes in a bin.
-    size_t bin_slot_offsets_[kBinSize];  // Number of bytes in previous bins.
-    size_t bin_slot_count_[kBinSize];  // Number of objects in a bin.
+    size_t bin_slot_sizes_[kBinSize] = {};  // Number of bytes in a bin.
+    size_t bin_slot_offsets_[kBinSize] = {};  // Number of bytes in previous bins.
+    size_t bin_slot_count_[kBinSize] = {};  // Number of objects in a bin.
+
+    // Cached size of the intern table for when we allocate memory.
+    size_t intern_table_bytes_ = 0;
+
+    // Number of image class table bytes.
+    size_t class_table_bytes_ = 0;
+
+    // Intern table associated with this image for serialization.
+    std::unique_ptr<InternTable> intern_table_;
+
+    // Class table associated with this image for serialization.
+    std::unique_ptr<ClassTable> class_table_;
   };
 
   // We use the lock word to store the offset of the object in the image.
@@ -483,18 +470,12 @@ class ImageWriter FINAL {
   const bool compile_pic_;
   const bool compile_app_image_;
 
-  // Cache the boot image space in this class for faster lookups.
-  gc::space::ImageSpace* boot_image_space_;
-
   // Size of pointers on the target architecture.
   size_t target_ptr_size_;
 
   // Mapping of oat filename to image data.
   std::unordered_map<std::string, ImageInfo> image_info_map_;
 
-  // Cached size of the intern table for when we allocate memory.
-  size_t intern_table_bytes_;
-
   // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to
   // have one entry per art field for convenience. ArtFields are placed right after the end of the
   // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields.
@@ -528,9 +509,6 @@ class ImageWriter FINAL {
   // null is a valid entry.
   std::unordered_set<mirror::ClassLoader*> class_loaders_;
 
-  // Number of image class table bytes.
-  size_t class_table_bytes_;
-
   // Which mode the image is stored as, see image.h
   const ImageHeader::StorageMode image_storage_mode_;
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index b323d24038..bc51ed6e6a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
+#include "base/unix_file/fd_file.h"
 #include "compiler_callbacks.h"
 #include "dex/pass_manager.h"
 #include "dex/quick_compiler_callbacks.h"
@@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() {
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
   *callbacks = jit_compiler->GetCompilerCallbacks();
+  *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
 }
@@ -155,14 +157,33 @@ JitCompiler::JitCompiler() : total_time_(0) {
       /* dump_cfg_append */ false,
       cumulative_logger_.get(),
       /* swap_fd */ -1,
-      /* profile_file */ "",
-      /* dex to oat map */ nullptr));
+      /* dex to oat map */ nullptr,
+      /* profile_compilation_info */ nullptr));
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
+
+  if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef __ANDROID__
+    const char* prefix = GetAndroidData();
+#else
+    const char* prefix = "/tmp";
+#endif
+    DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+        << "Generating debug info only works with one compiler thread";
+    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+    if (perf_file_ == nullptr) {
+      LOG(FATAL) << "Could not create perf file at " << perf_filename;
+    }
+  }
 }
 
 JitCompiler::~JitCompiler() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
@@ -188,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
+    if (success && compiler_options_->GetGenerateDebugInfo()) {
+      const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+      std::ostringstream stream;
+      stream << std::hex
+             << reinterpret_cast<uintptr_t>(ptr)
+             << " "
+             << code_cache->GetMemorySizeOfCodePointer(ptr)
+             << " "
+             << PrettyMethod(method_to_compile)
+             << std::endl;
+      std::string str = stream.str();
+      bool res = perf_file_->WriteFully(str.c_str(), str.size());
+      CHECK(res);
+    }
   }
 
   // Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 913a6d00ae..037a18ac7a 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -43,6 +43,9 @@ class JitCompiler {
   size_t GetTotalCompileTime() const {
     return total_time_;
   }
+  CompilerOptions* GetCompilerOptions() const {
+    return compiler_options_.get();
+  }
 
  private:
   uint64_t total_time_;
@@ -53,6 +56,7 @@ class JitCompiler {
   std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<File> perf_file_;
 
   JitCompiler();
 
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 877a674674..b10cc3534c 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test {
         driver_(&compiler_options_, &verification_results_, &inliner_map_,
                 Compiler::kQuick, instruction_set, nullptr,
                 false, nullptr, nullptr, nullptr, 1u,
-                false, false, "", false, nullptr, -1, "", nullptr),
+                false, false, "", false, nullptr, -1, nullptr, nullptr),
         error_msg_(),
         instruction_set_(instruction_set),
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 58f46d69a2..9f7ffa5ace 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -121,7 +121,7 @@ class OatTest : public CommonCompilerTest {
                                               false,
                                               timer_.get(),
                                               -1,
-                                              "",
+                                              nullptr,
                                               nullptr));
   }
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index da2f9cbed5..eee6116098 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor {
           loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
         SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
         if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
-          HoistToPreheaderOrDeoptBlock(loop, array_get);
+          HoistToPreHeaderOrDeoptBlock(loop, array_get);
         }
       }
     }
@@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor {
       // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
       // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
       // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      HBasicBlock* block = GetPreHeader(loop, instruction);
       induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
       if (lower != nullptr) {
         InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
@@ -1358,7 +1359,7 @@ class BCEVisitor : public HGraphVisitor {
       return true;
     } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
       if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
-        HoistToPreheaderOrDeoptBlock(loop, length);
+        HoistToPreHeaderOrDeoptBlock(loop, length);
         return true;
       }
     }
@@ -1376,7 +1377,8 @@ class BCEVisitor : public HGraphVisitor {
       HInstruction* array = check->InputAt(0);
       if (loop->IsDefinedOutOfTheLoop(array)) {
         // Generate: if (array == null) deoptimize;
-        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HBasicBlock* block = GetPreHeader(loop, check);
         HInstruction* cond =
             new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
         InsertDeopt(loop, block, cond);
@@ -1423,6 +1425,28 @@ class BCEVisitor : public HGraphVisitor {
     return true;
   }
 
+  /**
+   * Returns appropriate preheader for the loop, depending on whether the
+   * instruction appears in the loop header or proper loop-body.
+   */
+  HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    HBasicBlock* header = loop->GetHeader();
+    const uint32_t loop_id = header->GetBlockId();
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      HBasicBlock* block = it->second;
+      // If always taken, keep it that way by returning the original preheader,
+      // which can be found by following the predecessor of the true-block twice.
+      if (instruction->GetBlock() == header) {
+        return block->GetSinglePredecessor()->GetSinglePredecessor();
+      }
+      return block;
+    }
+    return loop->GetPreHeader();
+  }
+
   /** Inserts a deoptimization test. */
   void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
@@ -1437,28 +1461,17 @@ class BCEVisitor : public HGraphVisitor {
   }
 
   /** Hoists instruction out of the loop to preheader or deoptimization block. */
-  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
-    // Use preheader unless there is an earlier generated deoptimization block since
-    // hoisted expressions may depend on and/or used by the deoptimization tests.
-    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    HBasicBlock* preheader = loop->GetPreHeader();
-    HBasicBlock* block = preheader;
-    auto it = taken_test_loop_.find(loop_id);
-    if (it != taken_test_loop_.end()) {
-      block = it->second;
-    }
-    // Hoist the instruction.
+  void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    HBasicBlock* block = GetPreHeader(loop, instruction);
     DCHECK(!instruction->HasEnvironment());
     instruction->MoveBefore(block->GetLastInstruction());
   }
 
   /**
-   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * Adds a new taken-test structure to a loop if needed and not already done.
    * The taken-test protects range analysis evaluation code to avoid any
    * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
    *
-   * Returns block in which deoptimizations/invariants can be put.
-   *
    *          old_preheader
    *               |
    *            if_block          <- taken-test protects deoptimization block
@@ -1490,16 +1503,11 @@ class BCEVisitor : public HGraphVisitor {
    *     array[i] = 0;
    *   }
    */
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
-    // Not needed (can use preheader), or already done (can reuse)?
+  void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader) or already done (can reuse)?
     const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    if (!needs_taken_test) {
-      return loop->GetPreHeader();
-    } else {
-      auto it = taken_test_loop_.find(loop_id);
-      if (it != taken_test_loop_.end()) {
-        return it->second;
-      }
+    if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+      return;
     }
 
     // Generate top test structure.
@@ -1528,7 +1536,6 @@ class BCEVisitor : public HGraphVisitor {
     if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
 
     taken_test_loop_.Put(loop_id, true_block);
-    return true_block;
   }
 
   /**
@@ -1543,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor {
    *            \       /
    *           x_1 = phi(x_0, null)   <- synthetic phi
    *               |
-   *             header
+   *          new_preheader
    */
   void InsertPhiNodes() {
     // Scan all new deoptimization blocks.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 53d3615a41..ea0b9eca9a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
   stack_map_stream_.EndStackMapEntry();
 }
 
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+  uint32_t pc = GetAssembler()->CodeSize();
+  size_t count = stack_map_stream_.GetNumberOfStackMaps();
+  return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
 void CodeGenerator::RecordCatchBlockInfo() {
   ArenaAllocator* arena = graph_->GetArena();
 
@@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
         << "instruction->DebugName()=" << instruction->DebugName()
         << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
-           // Control flow would not come back into the code if a fatal slow
-           // path is taken, so we do not care if it triggers GC.
-           slow_path->IsFatal() ||
-           // HDeoptimize is a special case: we know we are not coming back from
-           // it into the code.
-           instruction->IsDeoptimize() ||
            // When read barriers are enabled, some instructions use a
            // slow path to emit a read barrier, which does not trigger
            // GC, is not fatal, nor is emitted by HDeoptimize
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index eade05d7b6..5958cd89bc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -269,6 +269,8 @@ class CodeGenerator {
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+  // Check whether we have already recorded mapping at this PC.
+  bool HasStackMapAtCurrentPc();
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -611,7 +613,7 @@ class CodeGenerator {
 
   ArenaVector<SlowPathCode*> slow_paths_;
 
-  // The current slow path that we're generating code for.
+  // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;
 
   // The current block index in `block_order_` of the block
@@ -672,6 +674,122 @@ class CallingConvention {
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
 
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType:  instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+                "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+      : graph_(graph),
+        codegen_(codegen),
+        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+  // Templating the method (rather than the whole class) on the slow-path type enables
+  // keeping this code at a generic, non architecture-specific place.
+  //
+  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
+  //       or template the class as a whole on SlowPathType.
+  template <typename SlowPathCodeType>
+  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
+    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+                  "SlowPathCodeType is not constructible from InstructionType*");
+    // Iterate over potential candidates for sharing. Currently, only same-typed
+    // slow-paths with exactly the same dex-pc are viable candidates.
+    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+    const uint32_t dex_pc = instruction->GetDexPc();
+    auto iter = slow_path_map_.find(dex_pc);
+    if (iter != slow_path_map_.end()) {
+      auto candidates = iter->second;
+      for (const auto& it : candidates) {
+        InstructionType* other_instruction = it.first;
+        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+        // Determine if the instructions allow for slow-path sharing.
+        if (HaveSameLiveRegisters(instruction, other_instruction) &&
+            HaveSameStackMap(instruction, other_instruction)) {
+          // Can share: reuse existing one.
+          return other_slow_path;
+        }
+      }
+    } else {
+      // First time this dex-pc is seen.
+      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+    }
+    // Cannot share: create and add new slow-path for this particular dex-pc.
+    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+    iter->second.emplace_back(std::make_pair(instruction, slow_path));
+    codegen_->AddSlowPath(slow_path);
+    return slow_path;
+  }
+
+ private:
+  // Tests if both instructions have same set of live physical registers. This ensures
+  // the slow-path has exactly the same preamble on saving these registers to stack.
+  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+    return (((live1->GetCoreRegisters() & core_spill) ==
+             (live2->GetCoreRegisters() & core_spill)) &&
+            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+             (live2->GetFloatingPointRegisters() & fpu_spill)));
+  }
+
+  // Tests if both instructions have the same stack map. This ensures the interpreter
+  // will find exactly the same dex-registers at the same entries.
+  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+    DCHECK(i1->HasEnvironment());
+    DCHECK(i2->HasEnvironment());
+    // We conservatively test if the two instructions find exactly the same instructions
+    // and location in each dex-register. This guarantees they will have the same stack map.
+    HEnvironment* e1 = i1->GetEnvironment();
+    HEnvironment* e2 = i2->GetEnvironment();
+    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+      return false;
+    }
+    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  HGraph* const graph_;
+  CodeGenerator* const codegen_;
+
+  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+  // TODO: under current regime, only deopt sharing make sense; extend later.
+  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9a1f2b8717..d64b8784e1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode {
 
 class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
@@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode {
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
  public:
@@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
     // to be instrumented, e.g.:
     //
     //   __ LoadFromOffset(kLoadWord, out, out, offset);
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
 class ReadBarrierForRootSlowPathARM : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const {
 }
 
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
-  ShifterOperand operand;
-  if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) {
-    __ cmp(left, operand);
-  } else {
-    Register temp = IP;
-    __ LoadImmediate(temp, right);
-    __ cmp(left, ShifterOperand(temp));
-  }
-}
-
 void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
                                                   Label* true_label,
                                                   Label* false_label) {
@@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
     int32_t val_low = Low32Bits(value);
     int32_t val_high = High32Bits(value);
 
-    GenerateCompareWithImmediate(left_high, val_high);
+    __ CmpConstant(left_high, val_high);
     if (if_cond == kCondNE) {
       __ b(true_label, ARMCondition(true_high_cond));
     } else if (if_cond == kCondEQ) {
@@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
       __ b(false_label, ARMCondition(false_high_cond));
     }
     // Must be equal high, so compare the lows.
-    GenerateCompareWithImmediate(left_low, val_low);
+    __ CmpConstant(left_low, val_low);
   } else {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
@@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio
       __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
     } else {
       DCHECK(right.IsConstant());
-      GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
     }
     if (true_target == nullptr) {
       __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
@@ -1610,8 +1655,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1623,6 +1667,10 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -1675,8 +1723,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
         __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
       } else {
         DCHECK(right.IsConstant());
-        GenerateCompareWithImmediate(left.AsRegister<Register>(),
-                                     CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+        __ CmpConstant(left.AsRegister<Register>(),
+                       CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
       __ it(ARMCondition(cond->GetCondition()), kItElse);
       __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
@@ -1891,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
 }
 
 void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
@@ -2846,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi
   Register dividend = locations->InAt(0).AsRegister<Register>();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (ctz_imm == 1) {
@@ -2923,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio
     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
   } else if (imm == 1 || imm == -1) {
     DivRemOneOrMinusOne(instruction);
-  } else if (IsPowerOfTwo(std::abs(imm))) {
+  } else if (IsPowerOfTwo(AbsOrMin(imm))) {
     DivRemByPowerOfTwo(instruction);
   } else {
     DCHECK(imm <= -2 || imm >= 2);
@@ -2952,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3078,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-        int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
-        if (abs_imm <= 1) {
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
         } else {
           locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(abs_imm)) {
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
             locations->AddTemp(Location::RequiresRegister());
           }
         }
@@ -3437,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
       Register first_reg = first.AsRegister<Register>();
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
-        // Arm doesn't mask the shift count so we need to do it ourselves.
+        // ARM doesn't mask the shift count so we need to do it ourselves.
         __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, out_reg);
@@ -3449,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
       } else {
         int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
         uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
-        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+        if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
           __ Mov(out_reg, first_reg);
         } else if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, shift_value);
@@ -3796,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
-  // TODO (ported from quick): revisit Arm barrier kinds
-  DmbOptions flavor = DmbOptions::ISH;  // quiet c++ warnings
+void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+  // TODO (ported from quick): revisit ARM barrier kinds.
+  DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
   switch (kind) {
     case MemBarrierKind::kAnyStore:
     case MemBarrierKind::kLoadAny:
@@ -3879,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   } else if (generate_volatile) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
 
     locations->AddTemp(Location::RequiresRegister());
@@ -3913,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   switch (field_type) {
@@ -4005,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4039,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   }
   if (volatile_for_double) {
-    // Arm encoding have some additional constraints for ldrexd/strexd:
+    // ARM encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
-    // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
-    // enable Arm encoding.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4105,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (field_type) {
-    case Primitive::kPrimBoolean: {
+    case Primitive::kPrimBoolean:
       __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimByte: {
+    case Primitive::kPrimByte:
       __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimShort: {
+    case Primitive::kPrimShort:
       __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
-    case Primitive::kPrimChar: {
+    case Primitive::kPrimChar:
       __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
       if (is_volatile && !atomic_ldrd_strd) {
         GenerateWideAtomicLoad(base, offset,
                                out.AsRegisterPairLow<Register>(),
@@ -4140,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
         __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
       }
       break;
-    }
 
-    case Primitive::kPrimFloat: {
+    case Primitive::kPrimFloat:
       __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
       break;
-    }
 
     case Primitive::kPrimDouble: {
       DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
@@ -4167,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
       UNREACHABLE();
   }
 
-  // Doubles are handled in the switch.
-  if (field_type != Primitive::kPrimDouble) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+    // Potential implicit null checks, in the case of reference or
+    // double fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4340,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
@@ -4347,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4366,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4380,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4394,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4406,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4424,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadWord, out, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+          __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
+        __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegister());
+      SRegister out = out_loc.AsFpuRegister<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+        __ LoadSFromOffset(out, obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-        __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+        __ LoadSFromOffset(out, IP, data_offset);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK(out.IsFpuRegisterPair());
+      SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+        __ LoadDFromOffset(FromLowSToD(out), obj, offset);
       } else {
         __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
-        __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+        __ LoadDFromOffset(FromLowSToD(out), IP, data_offset);
       }
       break;
     }
@@ -4470,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4574,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
           __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
         DCHECK(!may_need_runtime_call_for_type_check);
         break;
@@ -4615,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
           //   __ Mov(temp2, temp1);
           //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
           //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = value->klass_
           //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
           //
           //   __ cmp(temp1, ShifterOperand(temp2));
@@ -4717,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
         __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
         __ StoreToOffset(kStoreWord, value, IP, data_offset);
       }
-
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4770,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
       UNREACHABLE();
   }
 
-  // Ints and objects are handled in the switch.
-  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+  // Objects are handled in the switch.
+  if (value_type != Primitive::kPrimNot) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -5140,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ AddConstant(out, current_method, declaring_class_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
@@ -5157,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
                       out,
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ AddConstant(out, out, cache_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ LoadFromOffset(kLoadWord, out, out, cache_offset);
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5230,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
   Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ AddConstant(out, current_method, declaring_class_offset);
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ AddConstant(out, out, cache_offset);
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ LoadFromOffset(kLoadWord, out, out, cache_offset);
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
@@ -5300,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5326,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5355,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ LoadFromOffset(kLoadWord, out, obj, class_offset);
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
@@ -5373,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -5401,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -5429,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5478,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
                                                                     /* is_fatal */ false);
@@ -5532,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) {
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5571,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5589,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       // object to avoid doing a comparison we know will fail.
       Label loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5612,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5629,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5651,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5664,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       __ b(&done, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5689,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5699,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       __ CompareAndBranchIfZero(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5717,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -5901,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
   }
 }
 
-void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
-                                           Location out,
-                                           Location ref,
-                                           Location obj,
-                                           uint32_t offset,
-                                           Location index) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ LoadFromOffset(
+          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+      __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ AddConstant(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 uint32_t offset,
+                                                                 Location index,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
+  __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Introduce a dependency on the high bits of rb_state, which shall
+  // be all zeroes, to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
+  __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+  // obj is unchanged by this operation, but its value now depends on
+  // IP, which depends on temp_reg.
+  __ add(obj, obj, ShifterOperand(IP));
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    // /* HeapReference<Object> */ ref =
+    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+      __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+    } else {
+      __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+      __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
+  __ b(slow_path->GetEntryLabel(), EQ);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -5922,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
       ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ b(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                Location out,
-                                                Location ref,
-                                                Location obj,
-                                                uint32_t offset,
-                                                Location index) {
+void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<Register>());
   }
 }
 
-void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                  Location out,
-                                                  Location root) {
+void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ b(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6304,7 +6549,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr)
     }
     if (num_entries - last_index == 2) {
       // The last missing case_value.
-      GenerateCompareWithImmediate(temp_reg, 1);
+      __ CmpConstant(temp_reg, 1);
       __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
@@ -6364,7 +6609,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB
 
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index b7c58e1248..26d6d63b31 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
 
@@ -222,24 +222,57 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
   void HandleLongRotate(LocationSummary* locations);
   void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
                                Register value_lo, Register value_hi,
                                Register temp1, Register temp2,
                                HInstruction* instruction);
   void GenerateWideAtomicLoad(Register addr, uint32_t offset,
                               Register out_lo, Register out_hi);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              Label* true_target,
                              Label* false_target);
-  void GenerateCompareWithImmediate(Register left, int32_t right);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
@@ -346,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator {
   // Emit a write barrier.
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -406,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator {
     return &it->second;
   }
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -423,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator {
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
-
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
-
-  // Generate a read barrier for a GC root within `instruction`.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -449,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator {
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b49f42b6c8..a3150d3d22 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
 
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
@@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct
 
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc
   Register out = OutputRegister(instruction);
   Register dividend = InputRegisterAt(instruction, 0);
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2940,9 +2939,8 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeARM64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -2954,6 +2952,10 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e90ac6345..f2ff89488e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
 };
 
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 4648606da8..322912976e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
 
 class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
                                 instruction_,
-                                dex_pc,
+                                instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
@@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
@@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) {
     // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
     // unpredictable and the following mfch1 will fail.
     __ Mfc1(TMP, f1);
-    __ Mfhc1(AT, f1);
+    __ MoveFromFpuHigh(AT, f1);
     __ Mtc1(r2_l, f1);
-    __ Mthc1(r2_h, f1);
+    __ MoveToFpuHigh(r2_h, f1);
     __ Move(r2_l, TMP);
     __ Move(r2_h, AT);
   } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
@@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
       Register dst_low =  destination.AsRegisterPairLow<Register>();
       FRegister src = source.AsFpuRegister<FRegister>();
       __ Mfc1(dst_low, src);
-      __ Mfhc1(dst_high, src);
+      __ MoveFromFpuHigh(dst_high, src);
     } else {
       DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
       int32_t off = source.GetStackIndex();
@@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
       Register src_high = source.AsRegisterPairHigh<Register>();
       Register src_low = source.AsRegisterPairLow<Register>();
       __ Mtc1(src_low, dst);
-      __ Mthc1(src_high, dst);
+      __ MoveToFpuHigh(src_high, dst);
     } else if (source.IsFpuRegister()) {
       __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
     } else {
@@ -1241,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi
 
 InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
                                                            CodeGeneratorMIPS* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1511,7 +1508,7 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction)
 }
 
 void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -1534,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
 static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
 
 void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1542,30 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
   bool use_imm = rhs_location.IsConstant();
   Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
-  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
-  uint32_t shift_value = rhs_imm & shift_mask;
-  // Is the INS (Insert Bit Field) instruction supported?
-  bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  const uint32_t shift_mask = (type == Primitive::kPrimInt)
+      ? kMaxIntShiftValue
+      : kMaxLongShiftValue;
+  const uint32_t shift_value = rhs_imm & shift_mask;
+  // Are the INS (Insert Bit Field) and ROTR instructions supported?
+  bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
   switch (type) {
     case Primitive::kPrimInt: {
       Register dst = locations->Out().AsRegister<Register>();
       Register lhs = locations->InAt(0).AsRegister<Register>();
       if (use_imm) {
-        if (instr->IsShl()) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (instr->IsShl()) {
           __ Sll(dst, lhs, shift_value);
         } else if (instr->IsShr()) {
           __ Sra(dst, lhs, shift_value);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srl(dst, lhs, shift_value);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotr(dst, lhs, shift_value);
+          } else {
+            __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask);
+            __ Srl(dst, lhs, shift_value);
+            __ Or(dst, dst, TMP);
+          }
         }
       } else {
         if (instr->IsShl()) {
           __ Sllv(dst, lhs, rhs_reg);
         } else if (instr->IsShr()) {
           __ Srav(dst, lhs, rhs_reg);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst, lhs, rhs_reg);
+        } else {
+          if (has_ins_rotr) {
+            __ Rotrv(dst, lhs, rhs_reg);
+          } else {
+            __ Subu(TMP, ZERO, rhs_reg);
+            // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+            // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+            // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+            // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+            // IOW, the OR'd values are equal.
+            __ Sllv(TMP, lhs, TMP);
+            __ Srlv(dst, lhs, rhs_reg);
+            __ Or(dst, dst, TMP);
+          }
         }
       }
       break;
@@ -1580,7 +1605,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
           if (shift_value == 0) {
             codegen_->Move64(locations->Out(), locations->InAt(0));
           } else if (shift_value < kMipsBitsPerWord) {
-            if (has_ins) {
+            if (has_ins_rotr) {
               if (instr->IsShl()) {
                 __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
                 __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
@@ -1589,10 +1614,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
                 __ Sra(dst_high, lhs_high, shift_value);
+              } else if (instr->IsUShr()) {
+                __ Srl(dst_low, lhs_low, shift_value);
+                __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                __ Srl(dst_high, lhs_high, shift_value);
               } else {
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
                 __ Srl(dst_high, lhs_high, shift_value);
+                __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
               }
             } else {
               if (instr->IsShl()) {
@@ -1605,24 +1635,51 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
                 __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Or(dst_low, dst_low, TMP);
-              } else {
+              } else if (instr->IsUShr()) {
                 __ Srl(dst_high, lhs_high, shift_value);
                 __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
                 __ Srl(dst_low, lhs_low, shift_value);
                 __ Or(dst_low, dst_low, TMP);
+              } else {
+                __ Srl(TMP, lhs_low, shift_value);
+                __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value);
+                __ Or(dst_low, dst_low, TMP);
+                __ Srl(TMP, lhs_high, shift_value);
+                __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+                __ Or(dst_high, dst_high, TMP);
               }
             }
           } else {
-            shift_value -= kMipsBitsPerWord;
+            const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
             if (instr->IsShl()) {
-              __ Sll(dst_high, lhs_low, shift_value);
+              __ Sll(dst_high, lhs_low, shift_value_high);
               __ Move(dst_low, ZERO);
             } else if (instr->IsShr()) {
-              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_low, lhs_high, shift_value_high);
               __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
-            } else {
-              __ Srl(dst_low, lhs_high, shift_value);
+            } else if (instr->IsUShr()) {
+              __ Srl(dst_low, lhs_high, shift_value_high);
               __ Move(dst_high, ZERO);
+            } else {
+              if (shift_value == kMipsBitsPerWord) {
+                // 64-bit rotation by 32 is just a swap.
+                __ Move(dst_low, lhs_high);
+                __ Move(dst_high, lhs_low);
+              } else {
+                if (has_ins_rotr) {
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                } else {
+                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Or(dst_low, dst_low, TMP);
+                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Or(dst_high, dst_high, TMP);
+                }
+              }
             }
           }
       } else {
@@ -1649,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Sra(dst_high, dst_high, 31);
-        } else {
+        } else if (instr->IsUShr()) {
           __ Srlv(dst_high, lhs_high, rhs_reg);
           __ Nor(AT, ZERO, rhs_reg);
           __ Sll(TMP, lhs_high, 1);
@@ -1660,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
           __ Beqz(TMP, &done);
           __ Move(dst_low, dst_high);
           __ Move(dst_high, ZERO);
+        } else {
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Srlv(TMP, lhs_low, rhs_reg);
+          __ Sll(dst_low, lhs_high, 1);
+          __ Sllv(dst_low, dst_low, AT);
+          __ Or(dst_low, dst_low, TMP);
+          __ Srlv(TMP, lhs_high, rhs_reg);
+          __ Sll(dst_high, lhs_low, 1);
+          __ Sllv(dst_high, dst_high, AT);
+          __ Or(dst_high, dst_high, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(TMP, dst_high);
+          __ Move(dst_high, dst_low);
+          __ Move(dst_low, TMP);
         }
         __ Bind(&done);
       }
@@ -2314,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct
   Register out = locations->Out().AsRegister<Register>();
   Register dividend = locations->InAt(0).AsRegister<Register>();
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
-  uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2418,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -3358,8 +3429,8 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -3371,6 +3442,10 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3457,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
       // Need to move to FP regs since FP results are returned in core registers.
       __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->Out().AsFpuRegister<FRegister>());
-      __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->Out().AsFpuRegister<FRegister>());
+      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->Out().AsFpuRegister<FRegister>());
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3578,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
       // Pass FP parameters in core registers.
       __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->InAt(1).AsFpuRegister<FRegister>());
-      __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->InAt(1).AsFpuRegister<FRegister>());
+      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         locations->InAt(1).AsFpuRegister<FRegister>());
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -4536,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN
   codegen_->GenerateFrameExit();
 }
 
-void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void LocationsBuilderMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
-void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
@@ -4731,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
   DCHECK_NE(input_type, result_type);
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4738,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
   }
 
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+  if (!isR6 &&
+      ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+       (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
     call_kind = LocationSummary::kCall;
   }
 
@@ -4777,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4822,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
+    if (input_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+        Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+        FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+        __ Mtc1(src_low, FTMP);
+        __ Mthc1(src_high, FTMP);
+        if (result_type == Primitive::kPrimFloat) {
+          __ Cvtsl(dst, FTMP);
+        } else {
+          __ Cvtdl(dst, FTMP);
+        }
+      } else {
+        int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                      : QUICK_ENTRY_POINT(pL2d);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                             : IsDirectEntrypoint(kQuickL2d);
+        codegen_->InvokeRuntime(entry_offset,
+                                conversion,
+                                conversion->GetDexPc(),
+                                nullptr,
+                                direct);
+        if (result_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        } else {
+          CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        }
+      }
+    } else {
       Register src = locations->InAt(0).AsRegister<Register>();
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       __ Mtc1(src, FTMP);
@@ -4831,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
-                                                           : IsDirectEntrypoint(kQuickL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    bool direct;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
-                                                      : IsDirectEntrypoint(kQuickD2iz);
+    if (result_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+        MipsLabel truncate;
+        MipsLabel done;
+
+        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+        // value when the input is either a NaN or is outside of the range of the output type
+        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+        // the same result.
+        //
+        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+        // value of the output type if the input is outside of the range after the truncation or
+        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+        // results. This matches the desired float/double-to-int/long conversion exactly.
+        //
+        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+        //
+        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+        // even though it must be NAN2008=1 on R6.
+        //
+        // The code takes care of the different behaviors by first comparing the input to the
+        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+        // If the input is greater than or equal to the minimum, it procedes to the truncate
+        // instruction, which will handle such an input the same way irrespective of NAN2008.
+        // Otherwise the input is compared to itself to determine whether it is a NaN or not
+        // in order to return either zero or the minimum value.
+        //
+        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+        // truncate instruction for MIPS64R6.
+        if (input_type == Primitive::kPrimFloat) {
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ Mthc1(TMP, FTMP);
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ Move(dst_low, ZERO);
+        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst_high, dst_high, TMP);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ TruncLS(FTMP, src);
+        } else {
+          __ TruncLD(FTMP, src);
+        }
+        __ Mfc1(dst_low, FTMP);
+        __ Mfhc1(dst_high, FTMP);
+
+        __ Bind(&done);
+      } else {
+        int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                                     : QUICK_ENTRY_POINT(pD2l);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                             : IsDirectEntrypoint(kQuickD2l);
+        codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+        if (input_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        } else {
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        }
+      }
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
-                                                      : IsDirectEntrypoint(kQuickD2l);
-    }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr,
-                            direct);
-    if (result_type != Primitive::kPrimLong) {
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      Register dst = locations->Out().AsRegister<Register>();
+      MipsLabel truncate;
+      MipsLabel done;
+
+      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+      // even though it must be NAN2008=1 on R6.
+      //
+      // For details see the large comment above for the truncation of float/double to long on R6.
+      //
+      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+      // truncate instruction for MIPS64R6.
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, min_val);
+        __ Mtc1(TMP, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, High32Bits(min_val));
+        __ Mtc1(ZERO, FTMP);
+        if (fpu_32bit) {
+          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+        } else {
+          __ Mthc1(TMP, FTMP);
+        }
       }
-    } else {
+
+      if (isR6) {
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst, dst, TMP);
+      } else {
+        if (input_type == Primitive::kPrimFloat) {
+          __ ColeS(0, FTMP, src);
+        } else {
+          __ ColeD(0, FTMP, src);
+        }
+        __ Bc1t(0, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CeqS(0, src, src);
+        } else {
+          __ CeqD(0, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Movf(dst, ZERO, 0);
+      }
+
+      __ B(&done);
+
+      __ Bind(&truncate);
+
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
+
+      __ Bind(&done);
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 38302ad315..c3d4851ee9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -197,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
 };
 
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 05834ff063..38c32cad06 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
 
 class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
@@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc
 
 InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
                                                                CodeGeneratorMIPS64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio
 }
 
 void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
 }
 
 void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
-  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
   LocationSummary* locations = instr->GetLocations();
   Primitive::Type type = instr->GetType();
 
@@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
           ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
           : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
 
-        if (type == Primitive::kPrimInt) {
+        if (shift_value == 0) {
+          if (dst != lhs) {
+            __ Move(dst, lhs);
+          }
+        } else if (type == Primitive::kPrimInt) {
           if (instr->IsShl()) {
             __ Sll(dst, lhs, shift_value);
           } else if (instr->IsShr()) {
             __ Sra(dst, lhs, shift_value);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srl(dst, lhs, shift_value);
+          } else {
+            __ Rotr(dst, lhs, shift_value);
           }
         } else {
           if (shift_value < 32) {
@@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
               __ Dsll(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl(dst, lhs, shift_value);
+            } else {
+              __ Drotr(dst, lhs, shift_value);
             }
           } else {
             shift_value -= 32;
@@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
               __ Dsll32(dst, lhs, shift_value);
             } else if (instr->IsShr()) {
               __ Dsra32(dst, lhs, shift_value);
-            } else {
+            } else if (instr->IsUShr()) {
               __ Dsrl32(dst, lhs, shift_value);
+            } else {
+              __ Drotr32(dst, lhs, shift_value);
             }
           }
         }
@@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
             __ Sllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Srav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Srlv(dst, lhs, rhs_reg);
+          } else {
+            __ Rotrv(dst, lhs, rhs_reg);
           }
         } else {
           if (instr->IsShl()) {
             __ Dsllv(dst, lhs, rhs_reg);
           } else if (instr->IsShr()) {
             __ Dsrav(dst, lhs, rhs_reg);
-          } else {
+          } else if (instr->IsUShr()) {
             __ Dsrlv(dst, lhs, rhs_reg);
+          } else {
+            __ Drotrv(dst, lhs, rhs_reg);
           }
         }
       }
@@ -1955,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
   int64_t imm = Int64FromConstant(second.GetConstant());
-  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
-  DCHECK(IsPowerOfTwo(abs_imm));
+  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
   if (instruction->IsDiv()) {
@@ -2138,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (IsPowerOfTwo(std::abs(imm))) {
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
       DivRemByPowerOfTwo(instruction);
     } else {
       DCHECK(imm <= -2 || imm >= 2);
@@ -2736,9 +2749,8 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -2750,6 +2762,10 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ Nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3722,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_
   codegen_->GenerateFrameExit();
 }
 
-void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void LocationsBuilderMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
-void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) {
+  HandleShift(ror);
 }
 
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
@@ -3918,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   }
 
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
-  }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
 
-  if (call_kind == LocationSummary::kNoCall) {
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(0, Location::RequiresRegister());
-    }
-
-    if (Primitive::IsFloatingPointType(result_type)) {
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-    } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-    }
+  if (Primitive::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
   } else {
-    InvokeRuntimeCallingConvention calling_convention;
-
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-    } else {
-      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    }
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
 
-    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+  if (Primitive::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -3992,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
-      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
-      __ Mtc1(src, FTMP);
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    if (input_type == Primitive::kPrimLong) {
+      __ Dmtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
-        __ Cvtsw(dst, FTMP);
+        __ Cvtsl(dst, FTMP);
       } else {
-        __ Cvtdw(dst, FTMP);
+        __ Cvtdl(dst, FTMP);
       }
     } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr);
+      __ Mtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        __ Cvtsw(dst, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        __ Cvtdw(dst, FTMP);
       }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    Mips64Label truncate;
+    Mips64Label done;
+
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+    //
+    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+    // even though it must be NAN2008=1 on R6.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
+    //
+    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+    // truncate instruction for MIPS64R6.
+    if (input_type == Primitive::kPrimFloat) {
+      uint32_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+      __ LoadConst32(TMP, min_val);
+      __ Mtc1(TMP, FTMP);
+      __ CmpLeS(FTMP, FTMP, src);
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
+      uint64_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+      __ LoadConst64(TMP, min_val);
+      __ Dmtc1(TMP, FTMP);
+      __ CmpLeD(FTMP, FTMP, src);
     }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr);
-    if (result_type != Primitive::kPrimLong) {
+
+    __ Bc1nez(FTMP, &truncate);
+
+    if (input_type == Primitive::kPrimFloat) {
+      __ CmpEqS(FTMP, src, src);
+    } else {
+      __ CmpEqD(FTMP, src, src);
+    }
+    if (result_type == Primitive::kPrimLong) {
+      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+    } else {
+      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+    }
+    __ Mfc1(TMP, FTMP);
+    __ And(dst, dst, TMP);
+
+    __ Bc(&done);
+
+    __ Bind(&truncate);
+
+    if (result_type == Primitive::kPrimLong) {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        __ TruncLS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        __ TruncLD(FTMP, src);
       }
+      __ Dmfc1(dst, FTMP);
     } else {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
     }
+
+    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 60ff96dc43..7182e8e987 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -201,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
 };
 
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 86327fb741..6ab3aaff4b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
 
 class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    DCHECK(instruction_->IsDeoptimize());
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
@@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const {
 }
 
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1611,9 +1610,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1625,6 +1622,10 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3223,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
   Register out_register = locations->Out().AsRegister<Register>();
   Register input_register = locations->InAt(0).AsRegister<Register>();
   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
 
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
   Register num = locations->GetTemp(0).AsRegister<Register>();
 
-  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ leal(num, Address(input_register, abs_imm - 1));
   __ testl(input_register, input_register);
   __ cmovl(kGreaterEqual, num, input_register);
   int shift = CTZ(imm);
@@ -3340,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
           DivByPowerOfTwo(instruction->AsDiv());
         } else {
           DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index df7347658b..c65c423eae 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 76a4ce2e93..294b40e3d4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
 
 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
-                                  deoptimize,
-                                  deoptimize->GetDexPc(),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
@@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
@@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
                                                                CodeGeneratorX86_64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1594,9 +1592,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86_64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -1608,6 +1604,10 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  if (codegen_->HasStackMapAtCurrentPc()) {
+    // Ensure that we do not collide with the stack map of the previous instruction.
+    __ nop();
+  }
   codegen_->RecordPcInfo(info, info->GetDexPc());
 }
 
@@ -3350,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
 
   int64_t imm = Int64FromConstant(second.GetConstant());
-
-  DCHECK(IsPowerOfTwo(std::abs(imm)));
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
 
   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ leal(tmp, Address(numerator, abs_imm - 1));
     __ testl(numerator, numerator);
     __ cmov(kGreaterEqual, tmp, numerator);
     int shift = CTZ(imm);
@@ -3371,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+    codegen_->Load64BitValue(rdx, abs_imm - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -3529,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
       DivByPowerOfTwo(instruction->AsDiv());
     } else {
       DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c5e8a04da6..505c9dcdad 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
 
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c504ded54c..b90afb1d73 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
 
 // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
 bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS/64.
-  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  switch (instruction_set) {
-    case kArm:
-    case kArm64:
-    case kThumb2:
-    case kX86:
-    case kX86_64:
-      break;
-    default:
-      return false;
-  }
   DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
   HInstruction* left = op->GetLeft();
   HInstruction* right = op->GetRight();
@@ -1261,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
 void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
   DCHECK(invoke->IsInvokeStaticOrDirect());
   DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS/64.
-  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  switch (instruction_set) {
-    case kArm:
-    case kArm64:
-    case kThumb2:
-    case kX86:
-    case kX86_64:
-      break;
-    default:
-      return;
-  }
   HInstruction* value = invoke->InputAt(0);
   HInstruction* distance = invoke->InputAt(1);
   // Replace the invoke with an HRor.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4683aee603..b1fbf28204 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke,
                          bool is_volatile,
                          CodeGeneratorARM* codegen) {
   LocationSummary* locations = invoke->GetLocations();
-  DCHECK((type == Primitive::kPrimInt) ||
-         (type == Primitive::kPrimLong) ||
-         (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
   Location base_loc = locations->InAt(1);
   Register base = base_loc.AsRegister<Register>();             // Object pointer.
@@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke,
   Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
   Location trg_loc = locations->Out();
 
-  if (type == Primitive::kPrimLong) {
-    Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
-    __ add(IP, base, ShifterOperand(offset));
-    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
-      __ ldrexd(trg_lo, trg_hi, IP);
-    } else {
-      __ ldrd(trg_lo, Address(IP));
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register trg = trg_loc.AsRegister<Register>();
+      __ ldr(trg, Address(base, offset));
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
     }
-  } else {
-    Register trg = trg_loc.AsRegister<Register>();
-    __ ldr(trg, Address(base, offset));
-  }
 
-  if (is_volatile) {
-    __ dmb(ISH);
-  }
+    case Primitive::kPrimNot: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+        } else {
+          __ ldr(trg, Address(base, offset));
+          if (is_volatile) {
+            __ dmb(ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ ldr(trg, Address(base, offset));
+        if (is_volatile) {
+          __ dmb(ISH);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
 
-  if (type == Primitive::kPrimNot) {
-    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    case Primitive::kPrimLong: {
+      Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+      __ add(IP, base, ShifterOperand(offset));
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+        __ ldrexd(trg_lo, trg_hi, IP);
+      } else {
+        __ ldrd(trg_lo, Address(IP));
+      }
+      if (is_volatile) {
+        __ dmb(ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
@@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
   }
 
   // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
   __ dmb(ISH);
 
   __ add(tmp_ptr, base, ShifterOperand(offset));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f723940444..81cab86c83 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1035,7 +1035,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
     __ Cbnz(tmp_32, &loop_head);
   } else {
-    __ Dmb(InnerShareable, BarrierWrites);
+    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
+    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
+    // one, as the latter allows a preceding load to be delayed past
+    // the STXR instruction below.
+    __ Dmb(InnerShareable, BarrierAll);
     __ Bind(&loop_head);
     // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
     // the reference stored in the object before attempting the CAS,
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 06fab616ad..bc126a2716 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() {
   return codegen_->GetGraph()->GetArena();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
   return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
   return codegen_->GetInstructionSetFeatures().IsR6();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+  return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler*
     Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
 
     __ Mfc1(out_lo, in);
-    __ Mfhc1(out_hi, in);
+    __ MoveFromFpuHigh(out_hi, in);
   } else {
     Register out = locations->Out().AsRegister<Register>();
 
@@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler*
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
 
     __ Mtc1(in_lo, out);
-    __ Mthc1(in_hi, out);
+    __ MoveToFpuHigh(in_hi, out);
   } else {
     Register in = locations->InAt(0).AsRegister<Register>();
 
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index f86b0efe4a..575a7d0a23 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,8 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
-  bool IsR2OrNewer(void);
-  bool IsR6(void);
+  bool IsR2OrNewer() const;
+  bool IsR6() const;
+  bool Is32BitFPU() const;
 
  private:
   MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f5a7048b01..b80c6bde82 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2183,10 +2183,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
                            IntrinsicExceptions exceptions) {
   intrinsic_ = intrinsic;
   IntrinsicOptimizations opt(this);
-  if (needs_env_or_cache == kNoEnvironmentOrCache) {
-    opt.SetDoesNotNeedDexCache();
-    opt.SetDoesNotNeedEnvironment();
-  }
+
   // Adjust method's side effects from intrinsic table.
   switch (side_effects) {
     case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
@@ -2194,6 +2191,14 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
     case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
     case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
   }
+
+  if (needs_env_or_cache == kNoEnvironmentOrCache) {
+    opt.SetDoesNotNeedDexCache();
+    opt.SetDoesNotNeedEnvironment();
+  } else {
+    // If we need an environment, that means there will be a call, which can trigger GC.
+    SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
+  }
   // Adjust method's exception status from intrinsic table.
   switch (exceptions) {
     case kNoThrow: SetCanThrow(false); break;
@@ -2325,4 +2330,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction*
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) {
+  os << "["
+     << " source=" << rhs.GetSource()
+     << " destination=" << rhs.GetDestination()
+     << " type=" << rhs.GetType()
+     << " instruction=";
+  if (rhs.GetInstruction() != nullptr) {
+    os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId();
+  } else {
+    os << "null";
+  }
+  os << " ]";
+  return os;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 59c07690b1..23132308f0 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1881,6 +1881,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
     return false;
   }
 
+  virtual bool IsActualObject() const {
+    return GetType() == Primitive::kPrimNot;
+  }
+
   void SetReferenceTypeInfo(ReferenceTypeInfo rti);
 
   ReferenceTypeInfo GetReferenceTypeInfo() const {
@@ -2500,8 +2504,10 @@ class HTryBoundary : public HTemplateInstruction<0> {
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
+  // We set CanTriggerGC to prevent any intermediate address to be live
+  // at the point of the `HDeoptimize`.
   HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
@@ -4017,8 +4023,10 @@ class HRem : public HBinaryOperation {
 
 class HDivZeroCheck : public HExpression<1> {
  public:
+  // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
+  // constructor.
   HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
@@ -4539,8 +4547,10 @@ class HPhi : public HInstruction {
 
 class HNullCheck : public HExpression<1> {
  public:
+  // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
+  // constructor.
   HNullCheck(HInstruction* value, uint32_t dex_pc)
-      : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     SetRawInputAt(0, value);
   }
 
@@ -4861,8 +4871,10 @@ class HArrayLength : public HExpression<1> {
 
 class HBoundsCheck : public HExpression<2> {
  public:
+  // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
+  // constructor.
   HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
-      : HExpression(index->GetType(), SideEffects::None(), dex_pc) {
+      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     DCHECK(index->GetType() == Primitive::kPrimInt);
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
@@ -5626,8 +5638,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
   }
 
   bool IsPending() const {
-    DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
-    return destination_.IsInvalid() && !source_.IsInvalid();
+    DCHECK(source_.IsValid() || destination_.IsInvalid());
+    return destination_.IsInvalid() && source_.IsValid();
   }
 
   // True if this blocks a move from the given location.
@@ -5671,6 +5683,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
   HInstruction* instruction_;
 };
 
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
+
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
 class HParallelMove : public HTemplateInstruction<0> {
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 18405f2623..445cdab191 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> {
 
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool IsActualObject() const OVERRIDE { return false; }
 
   HInstruction* GetBaseAddress() const { return InputAt(0); }
   HInstruction* GetOffset() const { return InputAt(1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cafc6c5440..bb840eabdd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,6 +17,7 @@
 #include "optimizing_compiler.h"
 
 #include <fstream>
+#include <memory>
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -52,6 +53,8 @@
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer_debug.h"
 #include "elf_writer_quick.h"
 #include "graph_checker.h"
 #include "graph_visualizer.h"
@@ -60,6 +63,7 @@
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
@@ -68,6 +72,7 @@
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "oat_quick_method_header.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
@@ -965,6 +970,39 @@ bool OptimizingCompiler::JitCompile(Thread* self,
     return false;
   }
 
+  if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+    const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+    const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+    CompiledMethod compiled_method(
+        GetCompilerDriver(),
+        codegen->GetInstructionSet(),
+        ArrayRef<const uint8_t>(code_allocator.GetMemory()),
+        codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+        codegen->GetCoreSpillMask(),
+        codegen->GetFpuSpillMask(),
+        ArrayRef<const SrcMapElem>(),
+        ArrayRef<const uint8_t>(),  // mapping_table.
+        ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
+        ArrayRef<const uint8_t>(),  // native_gc_map.
+        ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+        ArrayRef<const LinkerPatch>());
+    dwarf::MethodDebugInfo method_debug_info {
+        dex_file,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        code_item,
+        false,  // deduped.
+        code_address,
+        code_address + code_allocator.GetSize(),
+        &compiled_method
+    };
+    ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info);
+    CreateJITCodeEntryForAddress(code_address,
+                                 std::unique_ptr<const uint8_t[]>(elf_file.data()),
+                                 elf_file.size());
+  }
+
   return true;
 }
 
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 176c50ce21..9d136f3ae6 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <iostream>
 
 #include "parallel_move_resolver.h"
 
@@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
         i = -1;
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
-        // move by just returning from this `PerforrmMove`.
+        // move by just returning from this `PerformMove`.
         moves_[index]->ClearPending(destination);
         return required_swap;
       }
@@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
   } else {
     for (MoveOperands* other_move : moves_) {
       if (other_move->Blocks(destination)) {
-        DCHECK(other_move->IsPending());
+        DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
         if (!move->Is64BitMove() && other_move->Is64BitMove()) {
           // We swap 64bits moves before swapping 32bits moves. Go back from the
           // cycle by returning the move that must be swapped.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index d1770b75ab..63ef600756 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
     if (can_merge_with_load_class && !load_class->HasUses()) {
       load_class->GetBlock()->RemoveInstruction(load_class);
     }
-  } else if (can_merge_with_load_class) {
+  } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
     load_class->SetMustGenerateClinitCheck(true);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 5ab4547e22..2bae4bc5c8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1679,6 +1679,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
 
       LocationSummary* locations = safepoint_position->GetLocations();
       if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+        DCHECK(interval->GetDefinedBy()->IsActualObject())
+            << interval->GetDefinedBy()->DebugName()
+            << "@" << safepoint_position->GetInstruction()->DebugName();
         locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
       }
 
@@ -1691,6 +1694,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
                       maximum_number_of_live_fp_registers_);
           }
           if (current->GetType() == Primitive::kPrimNot) {
+            DCHECK(interval->GetDefinedBy()->IsActualObject())
+                << interval->GetDefinedBy()->DebugName()
+                << "@" << safepoint_position->GetInstruction()->DebugName();
             locations->SetRegisterBit(source.reg());
           }
           break;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4eacaa..4784de1380 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
       stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
     }
 
-    if (entry.num_dex_registers == 0) {
+    if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
       stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
     } else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502fde6..604787fd92 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
   stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.EndStackMapEntry();
 
+  number_of_dex_registers = 1;
+  stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
+
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
   CodeInfo code_info(region);
   StackMapEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
   uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
 
   ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
   ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+  stack_map = code_info.GetStackMapAt(1, encoding);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
new file mode 100644
index 0000000000..81f2a5692d
--- /dev/null
+++ b/compiler/profile_assistant.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_assistant.h"
+
+namespace art {
+
+// Minimum number of new methods that profiles must contain to enable recompilation.
+static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+
+bool ProfileAssistant::ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::vector<std::string>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  DCHECK(!profile_files.empty());
+  DCHECK(reference_profile_files.empty() ||
+      (profile_files.size() == reference_profile_files.size()));
+
+  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
+  bool should_compile = false;
+  // Read the main profile files.
+  for (size_t i = 0; i < profile_files.size(); i++) {
+    if (!new_info[i].Load(profile_files[i])) {
+      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+      return false;
+    }
+    // Do we have enough new profiled methods that will make the compilation worthwhile?
+    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
+  }
+  if (!should_compile) {
+    *profile_compilation_info = nullptr;
+    return true;
+  }
+
+  std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  for (size_t i = 0; i < new_info.size(); i++) {
+    // Merge all data into a single object.
+    result->Load(new_info[i]);
+    // If we have any reference profile information merge their information with
+    // the current profiles and save them back to disk.
+    if (!reference_profile_files.empty()) {
+      if (!new_info[i].Load(reference_profile_files[i])) {
+        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i])) {
+        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+        return false;
+      }
+    }
+  }
+  *profile_compilation_info = result.release();
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
new file mode 100644
index 0000000000..088c8bd1c7
--- /dev/null
+++ b/compiler/profile_assistant.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_
+#define ART_COMPILER_PROFILE_ASSISTANT_H_
+
+#include <string>
+#include <vector>
+
+#include "jit/offline_profiling_info.cc"
+
+namespace art {
+
+class ProfileAssistant {
+ public:
+  // Process the profile information present in the given files. Returns true
+  // if the analysis ended up successfully (i.e. no errors during reading,
+  // merging or writing of profile files).
+  //
+  // If the returned value is true and there is a significant difference between
+  // profile_files and reference_profile_files:
+  //   - profile_compilation_info is set to a not null object that
+  //     can be used to drive compilation. It will be the merge of all the data
+  //     found in profile_files and reference_profile_files.
+  //   - the data from profile_files[i] is merged into
+  //     reference_profile_files[i] and the corresponding backing file is
+  //     updated.
+  //
+  // If the returned value is false or the difference is insignificant,
+  // profile_compilation_info will be set to null.
+  //
+  // Additional notes:
+  //   - as mentioned above, this function may update the content of the files
+  //     passed with the reference_profile_files.
+  //   - if reference_profile_files is not empty it must be the same size as
+  //     profile_files.
+  static bool ProcessProfiles(
+      const std::vector<std::string>& profile_files,
+      const std::vector<std::string>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_PROFILE_ASSISTANT_H_
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index b79c2f0f4e..f96376d9fe 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -501,6 +501,8 @@ class ArmAssembler : public Assembler {
 
   virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
+  // Note: CMN updates flags based on addition of its operands. Do not confuse
+  // the "N" suffix with bitwise inversion performed by MVN.
   virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index f341030c15..52023a67ee 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3428,10 +3428,10 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
     CHECK(rn != IP);
     // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
     Register temp = (rd != rn) ? rd : IP;
-    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
       mvn(temp, shifter_op, cond, kCcKeep);
       add(rd, rn, ShifterOperand(temp), cond, set_cc);
-    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
       mvn(temp, shifter_op, cond, kCcKeep);
       sub(rd, rn, ShifterOperand(temp), cond, set_cc);
     } else if (High16Bits(-value) == 0) {
@@ -3449,22 +3449,32 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
 }
 
 void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
-  // We prefer to select the shorter code sequence rather than selecting add for
-  // positive values and sub for negatives ones, which would slightly improve
-  // the readability of generated code for some constants.
+  // We prefer to select the shorter code sequence rather than using plain cmp and cmn
+  // which would slightly improve the readability of generated code for some constants.
   ShifterOperand shifter_op;
   if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
     cmp(rn, shifter_op, cond);
-  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) {
     cmn(rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    movw(IP, Low16Bits(value), cond);
-    uint16_t value_high = High16Bits(value);
-    if (value_high != 0) {
-      movt(IP, value_high, cond);
+    if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
+      mvn(IP, shifter_op, cond, kCcKeep);
+      cmp(rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
+      mvn(IP, shifter_op, cond, kCcKeep);
+      cmn(rn, ShifterOperand(IP), cond);
+    } else if (High16Bits(-value) == 0) {
+      movw(IP, Low16Bits(-value), cond);
+      cmn(rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      cmp(rn, ShifterOperand(IP), cond);
     }
-    cmp(rn, ShifterOperand(IP), cond);
   }
 }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 0ef0dc19e6..2df9b177bf 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1626,6 +1626,76 @@ TEST(Thumb2AssemblerTest, AddConstant) {
   EmitAndCheck(&assembler, "AddConstant");
 }
 
+TEST(Thumb2AssemblerTest, CmpConstant) {
+  arm::Thumb2Assembler assembler;
+
+  __ CmpConstant(R0, 0);                              // 16-bit CMP.
+  __ CmpConstant(R1, 1);                              // 16-bit CMP.
+  __ CmpConstant(R0, 7);                              // 16-bit CMP.
+  __ CmpConstant(R1, 8);                              // 16-bit CMP.
+  __ CmpConstant(R0, 255);                            // 16-bit CMP.
+  __ CmpConstant(R1, 256);                            // 32-bit CMP.
+  __ CmpConstant(R0, 257);                            // MNV+CMN.
+  __ CmpConstant(R1, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R0, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R1, 0x1001);                         // MNV+CMN.
+  __ CmpConstant(R0, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R1, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R0, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R1, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R0, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R1, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R0, -1);                             // 32-bit CMP.
+  __ CmpConstant(R1, -7);                             // CMN.
+  __ CmpConstant(R0, -8);                             // CMN.
+  __ CmpConstant(R1, -255);                           // CMN.
+  __ CmpConstant(R0, -256);                           // CMN.
+  __ CmpConstant(R1, -257);                           // MNV+CMP.
+  __ CmpConstant(R0, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R1, -0x1000);                        // CMN.
+  __ CmpConstant(R0, -0x1001);                        // MNV+CMP.
+  __ CmpConstant(R1, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R0, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R1, -0x10000);                       // CMN.
+  __ CmpConstant(R0, -0x10001);                       // CMN.
+  __ CmpConstant(R1, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R0, -0x10003);                       // MOVW+MOVT+CMP.
+
+  __ CmpConstant(R8, 0);                              // 32-bit CMP.
+  __ CmpConstant(R9, 1);                              // 32-bit CMP.
+  __ CmpConstant(R8, 7);                              // 32-bit CMP.
+  __ CmpConstant(R9, 8);                              // 32-bit CMP.
+  __ CmpConstant(R8, 255);                            // 32-bit CMP.
+  __ CmpConstant(R9, 256);                            // 32-bit CMP.
+  __ CmpConstant(R8, 257);                            // MNV+CMN
+  __ CmpConstant(R9, 0xfff);                          // MOVW+CMP.
+  __ CmpConstant(R8, 0x1000);                         // 32-bit CMP.
+  __ CmpConstant(R9, 0x1001);                         // MVN+CMN.
+  __ CmpConstant(R8, 0x1002);                         // MOVW+CMP.
+  __ CmpConstant(R9, 0xffff);                         // MOVW+CMP.
+  __ CmpConstant(R8, 0x10000);                        // 32-bit CMP.
+  __ CmpConstant(R9, 0x10001);                        // 32-bit CMP.
+  __ CmpConstant(R8, 0x10002);                        // MVN+CMN.
+  __ CmpConstant(R9, 0x10003);                        // MOVW+MOVT+CMP.
+  __ CmpConstant(R8, -1);                             // 32-bit CMP
+  __ CmpConstant(R9, -7);                             // CMN.
+  __ CmpConstant(R8, -8);                             // CMN.
+  __ CmpConstant(R9, -255);                           // CMN.
+  __ CmpConstant(R8, -256);                           // CMN.
+  __ CmpConstant(R9, -257);                           // MNV+CMP.
+  __ CmpConstant(R8, -0xfff);                         // MOVW+CMN.
+  __ CmpConstant(R9, -0x1000);                        // CMN.
+  __ CmpConstant(R8, -0x1001);                        // MVN+CMP.
+  __ CmpConstant(R9, -0x1002);                        // MOVW+CMN.
+  __ CmpConstant(R8, -0xffff);                        // MOVW+CMN.
+  __ CmpConstant(R9, -0x10000);                       // CMN.
+  __ CmpConstant(R8, -0x10001);                       // CMN.
+  __ CmpConstant(R9, -0x10002);                       // MVN+CMP.
+  __ CmpConstant(R8, -0x10003);                       // MOVW+MOVT+CMP.
+
+  EmitAndCheck(&assembler, "CmpConstant");
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index f07f8c74d7..6736015bf1 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,4 +1,4 @@
-const char* SimpleMovResults[] = {
+const char* const SimpleMovResults[] = {
   "   0:	0008      	movs	r0, r1\n",
   "   2:	4608      	mov	r0, r1\n",
   "   4:	46c8      	mov	r8, r9\n",
@@ -6,18 +6,18 @@ const char* SimpleMovResults[] = {
   "   8:	f04f 0809 	mov.w	r8, #9\n",
   nullptr
 };
-const char* SimpleMov32Results[] = {
+const char* const SimpleMov32Results[] = {
   "   0:	ea4f 0001 	mov.w	r0, r1\n",
   "   4:	ea4f 0809 	mov.w	r8, r9\n",
   nullptr
 };
-const char* SimpleMovAddResults[] = {
+const char* const SimpleMovAddResults[] = {
   "   0:	4608      	mov	r0, r1\n",
   "   2:	1888      	adds	r0, r1, r2\n",
   "   4:	1c08      	adds	r0, r1, #0\n",
   nullptr
 };
-const char* DataProcessingRegisterResults[] = {
+const char* const DataProcessingRegisterResults[] = {
   "   0:	ea6f 0001 	mvn.w	r0, r1\n",
   "   4:	eb01 0002 	add.w	r0, r1, r2\n",
   "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
@@ -129,7 +129,7 @@ const char* DataProcessingRegisterResults[] = {
   " 120:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
-const char* DataProcessingImmediateResults[] = {
+const char* const DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
   "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
@@ -154,7 +154,7 @@ const char* DataProcessingImmediateResults[] = {
   "  48:	1f48      	subs	r0, r1, #5\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediateResults[] = {
+const char* const DataProcessingModifiedImmediateResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f06f 1055 	mvn.w	r0, #5570645	; 0x550055\n",
   "   8:	f101 1055 	add.w	r0, r1, #5570645	; 0x550055\n",
@@ -173,7 +173,7 @@ const char* DataProcessingModifiedImmediateResults[] = {
   "  3c:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
   nullptr
 };
-const char* DataProcessingModifiedImmediatesResults[] = {
+const char* const DataProcessingModifiedImmediatesResults[] = {
   "   0:	f04f 1055 	mov.w	r0, #5570645	; 0x550055\n",
   "   4:	f04f 2055 	mov.w	r0, #1426085120	; 0x55005500\n",
   "   8:	f04f 3055 	mov.w	r0, #1431655765	; 0x55555555\n",
@@ -183,7 +183,7 @@ const char* DataProcessingModifiedImmediatesResults[] = {
   "  18:	f44f 70d4 	mov.w	r0, #424	; 0x1a8\n",
   nullptr
 };
-const char* DataProcessingShiftedRegisterResults[] = {
+const char* const DataProcessingShiftedRegisterResults[] = {
   "   0:	0123      	lsls	r3, r4, #4\n",
   "   2:	0963      	lsrs	r3, r4, #5\n",
   "   4:	11a3      	asrs	r3, r4, #6\n",
@@ -201,7 +201,7 @@ const char* DataProcessingShiftedRegisterResults[] = {
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
-const char* ShiftImmediateResults[] = {
+const char* const ShiftImmediateResults[] = {
   "   0:  0123        lsls  r3, r4, #4\n",
   "   2:  0963        lsrs  r3, r4, #5\n",
   "   4:  11a3        asrs  r3, r4, #6\n",
@@ -219,7 +219,7 @@ const char* ShiftImmediateResults[] = {
   "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
   nullptr
 };
-const char* BasicLoadResults[] = {
+const char* const BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
   "   4:	8b23      	ldrh	r3, [r4, #24]\n",
@@ -233,7 +233,7 @@ const char* BasicLoadResults[] = {
   "  20:	f9b4 8018 	ldrsh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* BasicStoreResults[] = {
+const char* const BasicStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	7623      	strb	r3, [r4, #24]\n",
   "   4:	8323      	strh	r3, [r4, #24]\n",
@@ -243,7 +243,7 @@ const char* BasicStoreResults[] = {
   "  10:	f8a4 8018 	strh.w	r8, [r4, #24]\n",
   nullptr
 };
-const char* ComplexLoadResults[] = {
+const char* const ComplexLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	f854 3f18 	ldr.w	r3, [r4, #24]!\n",
   "   6:	f854 3b18 	ldr.w	r3, [r4], #24\n",
@@ -276,7 +276,7 @@ const char* ComplexLoadResults[] = {
   "  6e:	f934 3918 	ldrsh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* ComplexStoreResults[] = {
+const char* const ComplexStoreResults[] = {
   "   0:	61a3      	str	r3, [r4, #24]\n",
   "   2:	f844 3f18 	str.w	r3, [r4, #24]!\n",
   "   6:	f844 3b18 	str.w	r3, [r4], #24\n",
@@ -297,7 +297,7 @@ const char* ComplexStoreResults[] = {
   "  3e:	f824 3918 	strh.w	r3, [r4], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreResults[] = {
+const char* const NegativeLoadStoreResults[] = {
   "   0:	f854 3c18 	ldr.w	r3, [r4, #-24]\n",
   "   4:	f854 3d18 	ldr.w	r3, [r4, #-24]!\n",
   "   8:	f854 3918 	ldr.w	r3, [r4], #-24\n",
@@ -348,12 +348,12 @@ const char* NegativeLoadStoreResults[] = {
   "  bc:	f824 3b18 	strh.w	r3, [r4], #24\n",
   nullptr
 };
-const char* SimpleLoadStoreDualResults[] = {
+const char* const SimpleLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9d0 2306 	ldrd	r2, r3, [r0, #24]\n",
   nullptr
 };
-const char* ComplexLoadStoreDualResults[] = {
+const char* const ComplexLoadStoreDualResults[] = {
   "   0:	e9c0 2306 	strd	r2, r3, [r0, #24]\n",
   "   4:	e9e0 2306 	strd	r2, r3, [r0, #24]!\n",
   "   8:	e8e0 2306 	strd	r2, r3, [r0], #24\n",
@@ -368,7 +368,7 @@ const char* ComplexLoadStoreDualResults[] = {
   "  2c:	e870 2306 	ldrd	r2, r3, [r0], #-24\n",
   nullptr
 };
-const char* NegativeLoadStoreDualResults[] = {
+const char* const NegativeLoadStoreDualResults[] = {
   "   0:	e940 2306 	strd	r2, r3, [r0, #-24]\n",
   "   4:	e960 2306 	strd	r2, r3, [r0, #-24]!\n",
   "   8:	e860 2306 	strd	r2, r3, [r0], #-24\n",
@@ -383,7 +383,7 @@ const char* NegativeLoadStoreDualResults[] = {
   "  2c:	e8f0 2306 	ldrd	r2, r3, [r0], #24\n",
   nullptr
 };
-const char* SimpleBranchResults[] = {
+const char* const SimpleBranchResults[] = {
   "   0:	2002      	movs	r0, #2\n",
   "   2:	2101      	movs	r1, #1\n",
   "   4:	e7fd      	b.n	2 <SimpleBranch+0x2>\n",
@@ -403,7 +403,7 @@ const char* SimpleBranchResults[] = {
   "  20:	2006      	movs	r0, #6\n",
   nullptr
 };
-const char* LongBranchResults[] = {
+const char* const LongBranchResults[] = {
   "   0:	f04f 0002 	mov.w	r0, #2\n",
   "   4:	f04f 0101 	mov.w	r1, #1\n",
   "   8:	f7ff bffc 	b.w	4 <LongBranch+0x4>\n",
@@ -423,14 +423,14 @@ const char* LongBranchResults[] = {
   "  40:	f04f 0006 	mov.w	r0, #6\n",
   nullptr
 };
-const char* LoadMultipleResults[] = {
+const char* const LoadMultipleResults[] = {
   "   0:	cc09      	ldmia	r4!, {r0, r3}\n",
   "   2:	e934 4800 	ldmdb	r4!, {fp, lr}\n",
   "   6:	e914 4800 	ldmdb	r4, {fp, lr}\n",
   "   a:	f854 5b04 	ldr.w	r5, [r4], #4\n",
   nullptr
 };
-const char* StoreMultipleResults[] = {
+const char* const StoreMultipleResults[] = {
   "   0:	c409      	stmia	r4!, {r0, r3}\n",
   "   2:	e8a4 4800 	stmia.w	r4!, {fp, lr}\n",
   "   6:	e884 4800 	stmia.w	r4, {fp, lr}\n",
@@ -438,7 +438,7 @@ const char* StoreMultipleResults[] = {
   "   e:	f844 5d04 	str.w	r5, [r4, #-4]!\n",
   nullptr
 };
-const char* MovWMovTResults[] = {
+const char* const MovWMovTResults[] = {
   "   0:	f240 0400 	movw  r4, #0\n",
   "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
   "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
@@ -449,7 +449,7 @@ const char* MovWMovTResults[] = {
   "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
-const char* SpecialAddSubResults[] = {
+const char* const SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
   "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
@@ -463,7 +463,7 @@ const char* SpecialAddSubResults[] = {
   "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
   nullptr
 };
-const char* LoadFromOffsetResults[] = {
+const char* const LoadFromOffsetResults[] = {
   "   0:	68e2      	ldr	r2, [r4, #12]\n",
   "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
   "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
@@ -514,7 +514,7 @@ const char* LoadFromOffsetResults[] = {
   "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
   nullptr
 };
-const char* StoreToOffsetResults[] = {
+const char* const StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
   "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
   "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
@@ -563,7 +563,7 @@ const char* StoreToOffsetResults[] = {
   "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
-const char* IfThenResults[] = {
+const char* const IfThenResults[] = {
   "   0:	bf08      	it	eq\n",
   "   2:	2101      	moveq	r1, #1\n",
   "   4:	bf04      	itt	eq\n",
@@ -587,7 +587,7 @@ const char* IfThenResults[] = {
   "  28:	2404      	movne	r4, #4\n",
   nullptr
 };
-const char* CbzCbnzResults[] = {
+const char* const CbzCbnzResults[] = {
   "   0:	b10a      	cbz	r2, 6 <CbzCbnz+0x6>\n",
   "   2:	2103      	movs	r1, #3\n",
   "   4:	2203      	movs	r2, #3\n",
@@ -598,7 +598,7 @@ const char* CbzCbnzResults[] = {
   "  10:	2204      	movs	r2, #4\n",
   nullptr
 };
-const char* MultiplyResults[] = {
+const char* const MultiplyResults[] = {
   "   0:	4348      	muls	r0, r1\n",
   "   2:	fb01 f002 	mul.w	r0, r1, r2\n",
   "   6:	fb09 f808 	mul.w	r8, r9, r8\n",
@@ -611,21 +611,21 @@ const char* MultiplyResults[] = {
   "  22:	fbaa 890b 	umull	r8, r9, sl, fp\n",
   nullptr
 };
-const char* DivideResults[] = {
+const char* const DivideResults[] = {
   "   0:	fb91 f0f2 	sdiv	r0, r1, r2\n",
   "   4:	fb99 f8fa 	sdiv	r8, r9, sl\n",
   "   8:	fbb1 f0f2 	udiv	r0, r1, r2\n",
   "   c:	fbb9 f8fa 	udiv	r8, r9, sl\n",
   nullptr
 };
-const char* VMovResults[] = {
+const char* const VMovResults[] = {
   "   0:	eef7 0a00 	vmov.f32	s1, #112	; 0x70\n",
   "   4:	eeb7 1b00 	vmov.f64	d1, #112	; 0x70\n",
   "   8:	eef0 0a41 	vmov.f32	s1, s2\n",
   "   c:	eeb0 1b42 	vmov.f64	d1, d2\n",
   nullptr
 };
-const char* BasicFloatingPointResults[] = {
+const char* const BasicFloatingPointResults[] = {
   "   0:	ee30 0a81 	vadd.f32	s0, s1, s2\n",
   "   4:	ee30 0ac1 	vsub.f32	s0, s1, s2\n",
   "   8:	ee20 0a81 	vmul.f32	s0, s1, s2\n",
@@ -646,7 +646,7 @@ const char* BasicFloatingPointResults[] = {
   "  44:	eeb1 0bc1 	vsqrt.f64	d0, d1\n",
   nullptr
 };
-const char* FloatingPointConversionsResults[] = {
+const char* const FloatingPointConversionsResults[] = {
   "   0:	eeb7 1bc2 	vcvt.f32.f64	s2, d2\n",
   "   4:	eeb7 2ac1 	vcvt.f64.f32	d2, s2\n",
   "   8:	eefd 0ac1 	vcvt.s32.f32	s1, s2\n",
@@ -659,35 +659,35 @@ const char* FloatingPointConversionsResults[] = {
   "  24:	eeb8 1b41 	vcvt.f64.u32	d1, s2\n",
   nullptr
 };
-const char* FloatingPointComparisonsResults[] = {
+const char* const FloatingPointComparisonsResults[] = {
   "   0:	eeb4 0a60 	vcmp.f32	s0, s1\n",
   "   4:	eeb4 0b41 	vcmp.f64	d0, d1\n",
   "   8:	eeb5 1a40 	vcmp.f32	s2, #0.0\n",
   "   c:	eeb5 2b40 	vcmp.f64	d2, #0.0\n",
   nullptr
 };
-const char* CallsResults[] = {
+const char* const CallsResults[] = {
   "   0:	47f0      	blx	lr\n",
   "   2:	4770      	bx	lr\n",
   nullptr
 };
-const char* BreakpointResults[] = {
+const char* const BreakpointResults[] = {
   "   0:	be00      	bkpt	0x0000\n",
   nullptr
 };
-const char* StrR1Results[] = {
+const char* const StrR1Results[] = {
   "   0:	9111      	str	r1, [sp, #68]	; 0x44\n",
   "   2:	f8cd 142c 	str.w	r1, [sp, #1068]	; 0x42c\n",
   nullptr
 };
-const char* VPushPopResults[] = {
+const char* const VPushPopResults[] = {
   "   0:	ed2d 1a04 	vpush	{s2-s5}\n",
   "   4:	ed2d 2b08 	vpush	{d2-d5}\n",
   "   8:	ecbd 1a04 	vpop	{s2-s5}\n",
   "   c:	ecbd 2b08 	vpop	{d2-d5}\n",
   nullptr
 };
-const char* Max16BitBranchResults[] = {
+const char* const Max16BitBranchResults[] = {
   "   0:	e3ff      	b.n	802 <Max16BitBranch+0x802>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -1716,7 +1716,7 @@ const char* Max16BitBranchResults[] = {
   " 802:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* Branch32Results[] = {
+const char* const Branch32Results[] = {
   "   0:	f000 bc01 	b.w	806 <Branch32+0x806>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -2746,7 +2746,7 @@ const char* Branch32Results[] = {
   " 806:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchMaxResults[] = {
+const char* const CompareAndBranchMaxResults[] = {
   "   0:	b3fc      	cbz	r4, 82 <CompareAndBranchMax+0x82>\n",
   "   2:	2300      	movs	r3, #0\n",
   "   4:	2302      	movs	r3, #2\n",
@@ -2815,7 +2815,7 @@ const char* CompareAndBranchMaxResults[] = {
   "  82:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation16Results[] = {
+const char* const CompareAndBranchRelocation16Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	d040      	beq.n	86 <CompareAndBranchRelocation16+0x86>\n",
   "   4:	2300      	movs	r3, #0\n",
@@ -2886,7 +2886,7 @@ const char* CompareAndBranchRelocation16Results[] = {
   "  86:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* CompareAndBranchRelocation32Results[] = {
+const char* const CompareAndBranchRelocation32Results[] = {
   "   0:	2c00      	cmp	r4, #0\n",
   "   2:	f000 8401 	beq.w	808 <CompareAndBranchRelocation32+0x808>\n",
   "   6:	2300      	movs	r3, #0\n",
@@ -3917,7 +3917,7 @@ const char* CompareAndBranchRelocation32Results[] = {
   " 808:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* MixedBranch32Results[] = {
+const char* const MixedBranch32Results[] = {
   "   0:	f000 bc03 	b.w	80a <MixedBranch32+0x80a>\n",
   "   4:	2300      	movs	r3, #0\n",
   "   6:	2302      	movs	r3, #2\n",
@@ -4948,7 +4948,7 @@ const char* MixedBranch32Results[] = {
   " 80a:	4611      	mov	r1, r2\n",
   nullptr
 };
-const char* ShiftsResults[] = {
+const char* const ShiftsResults[] = {
   "   0:	0148      	lsls	r0, r1, #5\n",
   "   2:	0948      	lsrs	r0, r1, #5\n",
   "   4:	1148      	asrs	r0, r1, #5\n",
@@ -4997,7 +4997,7 @@ const char* ShiftsResults[] = {
   "  98:	fa51 f008 	asrs.w	r0, r1, r8\n",
   nullptr
 };
-const char* LoadStoreRegOffsetResults[] = {
+const char* const LoadStoreRegOffsetResults[] = {
   "   0:	5888      	ldr	r0, [r1, r2]\n",
   "   2:	5088      	str	r0, [r1, r2]\n",
   "   4:	f851 0012 	ldr.w	r0, [r1, r2, lsl #1]\n",
@@ -5012,7 +5012,7 @@ const char* LoadStoreRegOffsetResults[] = {
   "  28:	f841 0008 	str.w	r0, [r1, r8]\n",
   nullptr
 };
-const char* LoadStoreLiteralResults[] = {
+const char* const LoadStoreLiteralResults[] = {
   "   0:   4801            ldr     r0, [pc, #4]    ; (8 <LoadStoreLiteral+0x8>)\n",
   "   2:   f8cf 0004       str.w   r0, [pc, #4]    ; 8 <LoadStoreLiteral+0x8>\n",
   "   6:   f85f 0008       ldr.w   r0, [pc, #-8]   ; 0 <LoadStoreLiteral>\n",
@@ -5023,7 +5023,7 @@ const char* LoadStoreLiteralResults[] = {
   "  18:   f8cf 07ff       str.w   r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
   nullptr
 };
-const char* LoadStoreLimitsResults[] = {
+const char* const LoadStoreLimitsResults[] = {
   "   0:   6fe0            ldr     r0, [r4, #124]  ; 0x7c\n",
   "   2:   f8d4 0080       ldr.w   r0, [r4, #128]  ; 0x80\n",
   "   6:   7fe0            ldrb    r0, [r4, #31]\n",
@@ -5042,7 +5042,7 @@ const char* LoadStoreLimitsResults[] = {
   "  30:   f8a4 0040       strh.w  r0, [r4, #64]   ; 0x40\n",
   nullptr
 };
-const char* CompareAndBranchResults[] = {
+const char* const CompareAndBranchResults[] = {
   "  0: b130        cbz r0, 10 <CompareAndBranch+0x10>\n",
   "  2: f1bb 0f00   cmp.w fp, #0\n",
   "  6: d003        beq.n 10 <CompareAndBranch+0x10>\n",
@@ -5052,7 +5052,7 @@ const char* CompareAndBranchResults[] = {
   nullptr
 };
 
-const char* AddConstantResults[] = {
+const char* const AddConstantResults[] = {
   "   0:	4608      	mov	r0, r1\n",
   "   2:	1c48      	adds	r0, r1, #1\n",
   "   4:	1dc8      	adds	r0, r1, #7\n",
@@ -5370,6 +5370,104 @@ const char* AddConstantResults[] = {
   nullptr
 };
 
+const char* const CmpConstantResults[] = {
+  "   0:	2800      	cmp	r0, #0\n",
+  "   2:	2901      	cmp	r1, #1\n",
+  "   4:	2807      	cmp	r0, #7\n",
+  "   6:	2908      	cmp	r1, #8\n",
+  "   8:	28ff      	cmp	r0, #255	; 0xff\n",
+  "   a:	f5b1 7f80 	cmp.w	r1, #256	; 0x100\n",
+  "   e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  12:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  16:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  1a:	4561      	cmp	r1, ip\n",
+  "  1c:	f5b0 5f80 	cmp.w	r0, #4096	; 0x1000\n",
+  "  20:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  24:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  28:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  2c:	4560      	cmp	r0, ip\n",
+  "  2e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  32:	4561      	cmp	r1, ip\n",
+  "  34:	f5b0 3f80 	cmp.w	r0, #65536	; 0x10000\n",
+  "  38:	f1b1 1f01 	cmp.w	r1, #65537	; 0x10001\n",
+  "  3c:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  40:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  44:	f240 0c03 	movw	ip, #3\n",
+  "  48:	f2c0 0c01 	movt	ip, #1\n",
+  "  4c:	4561      	cmp	r1, ip\n",
+  "  4e:	f1b0 3fff 	cmp.w	r0, #4294967295	; 0xffffffff\n",
+  "  52:	f111 0f07 	cmn.w	r1, #7\n",
+  "  56:	f110 0f08 	cmn.w	r0, #8\n",
+  "  5a:	f111 0fff 	cmn.w	r1, #255	; 0xff\n",
+  "  5e:	f510 7f80 	cmn.w	r0, #256	; 0x100\n",
+  "  62:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  66:	4561      	cmp	r1, ip\n",
+  "  68:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  6c:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  70:	f511 5f80 	cmn.w	r1, #4096	; 0x1000\n",
+  "  74:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  78:	4560      	cmp	r0, ip\n",
+  "  7a:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  7e:	eb11 0f0c 	cmn.w	r1, ip\n",
+  "  82:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  86:	eb10 0f0c 	cmn.w	r0, ip\n",
+  "  8a:	f511 3f80 	cmn.w	r1, #65536	; 0x10000\n",
+  "  8e:	f110 1f01 	cmn.w	r0, #65537	; 0x10001\n",
+  "  92:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  96:	4561      	cmp	r1, ip\n",
+  "  98:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  "  9c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  "  a0:	4560      	cmp	r0, ip\n",
+  "  a2:	f1b8 0f00 	cmp.w	r8, #0\n",
+  "  a6:	f1b9 0f01 	cmp.w	r9, #1\n",
+  "  aa:	f1b8 0f07 	cmp.w	r8, #7\n",
+  "  ae:	f1b9 0f08 	cmp.w	r9, #8\n",
+  "  b2:	f1b8 0fff 	cmp.w	r8, #255	; 0xff\n",
+  "  b6:	f5b9 7f80 	cmp.w	r9, #256	; 0x100\n",
+  "  ba:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  "  be:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  c2:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  "  c6:	45e1      	cmp	r9, ip\n",
+  "  c8:	f5b8 5f80 	cmp.w	r8, #4096	; 0x1000\n",
+  "  cc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  d0:	eb19 0f0c 	cmn.w	r9, ip\n",
+  "  d4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  d8:	45e0      	cmp	r8, ip\n",
+  "  da:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  de:	45e1      	cmp	r9, ip\n",
+  "  e0:	f5b8 3f80 	cmp.w	r8, #65536	; 0x10000\n",
+  "  e4:	f1b9 1f01 	cmp.w	r9, #65537	; 0x10001\n",
+  "  e8:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  ec:	eb18 0f0c 	cmn.w	r8, ip\n",
+  "  f0:	f240 0c03 	movw	ip, #3\n",
+  "  f4:	f2c0 0c01 	movt	ip, #1\n",
+  "  f8:	45e1      	cmp	r9, ip\n",
+  "  fa:	f1b8 3fff 	cmp.w	r8, #4294967295	; 0xffffffff\n",
+  "  fe:	f119 0f07 	cmn.w	r9, #7\n",
+  " 102:	f118 0f08 	cmn.w	r8, #8\n",
+  " 106:	f119 0fff 	cmn.w	r9, #255	; 0xff\n",
+  " 10a:	f518 7f80 	cmn.w	r8, #256	; 0x100\n",
+  " 10e:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 112:	45e1      	cmp	r9, ip\n",
+  " 114:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 118:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 11c:	f519 5f80 	cmn.w	r9, #4096	; 0x1000\n",
+  " 120:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 124:	45e0      	cmp	r8, ip\n",
+  " 126:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 12a:	eb19 0f0c 	cmn.w	r9, ip\n",
+  " 12e:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 132:	eb18 0f0c 	cmn.w	r8, ip\n",
+  " 136:	f519 3f80 	cmn.w	r9, #65536	; 0x10000\n",
+  " 13a:	f118 1f01 	cmn.w	r8, #65537	; 0x10001\n",
+  " 13e:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 142:	45e1      	cmp	r9, ip\n",
+  " 144:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 148:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 14c:	45e0      	cmp	r8, ip\n",
+  nullptr
+};
+
 std::map<std::string, const char* const*> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -5421,4 +5519,5 @@ void setup_results() {
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
     test_results["CompareAndBranch"] = CompareAndBranchResults;
     test_results["AddConstant"] = AddConstantResults;
+    test_results["CmpConstant"] = CmpConstantResults;
 }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 0dc307c9ac..ac9c097892 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1035,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) {
   EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
 }
 
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -1051,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
@@ -1067,6 +1091,24 @@ void MipsAssembler::Mthc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
+void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mfc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mfhc1(rt, fs);
+  }
+}
+
+void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mtc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mthc1(rt, fs);
+  }
+}
+
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
   EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
@@ -1213,10 +1255,10 @@ void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
     Mtc1(temp, rd);
   }
   if (high == 0) {
-    Mthc1(ZERO, rd);
+    MoveToFpuHigh(ZERO, rd);
   } else {
     LoadConst32(temp, high);
-    Mthc1(temp, rd);
+    MoveToFpuHigh(temp, rd);
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 066e7b0014..01c6490f88 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -265,15 +265,23 @@ class MipsAssembler FINAL : public Assembler {
   void Movf(Register rd, Register rs, int cc);  // R2
   void Movt(Register rd, Register rs, int cc);  // R2
 
+  void TruncLS(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncLD(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncWS(FRegister fd, FRegister fs);
+  void TruncWD(FRegister fd, FRegister fs);
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
   void Cvtds(FRegister fd, FRegister fs);
+  void Cvtsl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void Cvtdl(FRegister fd, FRegister fs);  // R2+, FR=1
 
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
   void Mfhc1(Register rt, FRegister fs);
   void Mthc1(Register rt, FRegister fs);
+  void MoveFromFpuHigh(Register rt, FRegister fs);
+  void MoveToFpuHigh(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
   void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 4361843c54..5fc3deebd3 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -599,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
 }
 
+TEST_F(AssemblerMIPSTest, CvtSL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSD) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
 }
@@ -607,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
 }
 
+TEST_F(AssemblerMIPSTest, TruncWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
 TEST_F(AssemblerMIPSTest, Mfc1) {
   DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index cfd8421e93..f9ff2df8bb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -771,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
 }
 
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
 void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 883f013f87..3262640ce7 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -250,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler {
   void RoundLD(FpuRegister fd, FpuRegister fs);
   void RoundWS(FpuRegister fd, FpuRegister fs);
   void RoundWD(FpuRegister fd, FpuRegister fs);
+  void TruncLS(FpuRegister fd, FpuRegister fs);
+  void TruncLD(FpuRegister fd, FpuRegister fs);
+  void TruncWS(FpuRegister fd, FpuRegister fs);
+  void TruncWD(FpuRegister fd, FpuRegister fs);
   void CeilLS(FpuRegister fd, FpuRegister fs);
   void CeilLD(FpuRegister fd, FpuRegister fs);
   void CeilWS(FpuRegister fd, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bac4375b35..7d79be2731 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -527,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
 }
 
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 42ed8810f8..244a5fedbe 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <numeric>
+#include <sys/mman.h>
 
 #include "base/logging.h"
 #include "base/macros.h"
@@ -44,23 +45,17 @@ static void DumpFreeMap(const FreeBySizeSet& free_by_size) {
   }
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void RemoveChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        typename FreeBySizeSet::const_iterator free_by_size_pos) {
+void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
   auto free_by_start_pos = free_by_size_pos->second;
-  free_by_size->erase(free_by_size_pos);
-  free_by_start->erase(free_by_start_pos);
+  free_by_size_.erase(free_by_size_pos);
+  free_by_start_.erase(free_by_start_pos);
 }
 
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void InsertChunk(FreeByStartSet* free_by_start,
-                        FreeBySizeSet* free_by_size,
-                        const SpaceChunk& chunk) {
+inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
   DCHECK_NE(chunk.size, 0u);
-  auto insert_result = free_by_start->insert(chunk);
+  auto insert_result = free_by_start_.insert(chunk);
   DCHECK(insert_result.second);
-  free_by_size->emplace(chunk.size, insert_result.first);
+  free_by_size_.emplace(chunk.size, insert_result.first);
 }
 
 SwapSpace::SwapSpace(int fd, size_t initial_size)
@@ -69,10 +64,18 @@ SwapSpace::SwapSpace(int fd, size_t initial_size)
       lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
   // Assume that the file is unlinked.
 
-  InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size));
+  InsertChunk(NewFileChunk(initial_size));
 }
 
 SwapSpace::~SwapSpace() {
+  // Unmap all mmapped chunks. Nothing should be allocated anymore at
+  // this point, so there should be only full size chunks in free_by_start_.
+  for (const SpaceChunk& chunk : free_by_start_) {
+    if (munmap(chunk.ptr, chunk.size) != 0) {
+      PLOG(ERROR) << "Failed to unmap swap space chunk at "
+          << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
+    }
+  }
   // All arenas are backed by the same file. Just close the descriptor.
   close(fd_);
 }
@@ -113,7 +116,7 @@ void* SwapSpace::Alloc(size_t size) {
       : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
   if (it != free_by_size_.end()) {
     old_chunk = *it->second;
-    RemoveChunk(&free_by_start_, &free_by_size_, it);
+    RemoveChunk(it);
   } else {
     // Not a big enough free chunk, need to increase file size.
     old_chunk = NewFileChunk(size);
@@ -124,13 +127,13 @@ void* SwapSpace::Alloc(size_t size) {
   if (old_chunk.size != size) {
     // Insert the remainder.
     SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
-    InsertChunk(&free_by_start_, &free_by_size_, new_chunk);
+    InsertChunk(new_chunk);
   }
 
   return ret;
 }
 
-SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
+SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
 #if !defined(__APPLE__)
   size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
   int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
@@ -159,7 +162,7 @@ SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
 }
 
 // TODO: Full coalescing.
-void SwapSpace::Free(void* ptrV, size_t size) {
+void SwapSpace::Free(void* ptr, size_t size) {
   MutexLock lock(Thread::Current(), lock_);
   size = RoundUp(size, 8U);
 
@@ -168,7 +171,7 @@ void SwapSpace::Free(void* ptrV, size_t size) {
     free_before = CollectFree(free_by_start_, free_by_size_);
   }
 
-  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size };
+  SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
   auto it = free_by_start_.lower_bound(chunk);
   if (it != free_by_start_.begin()) {
     auto prev = it;
@@ -180,7 +183,7 @@ void SwapSpace::Free(void* ptrV, size_t size) {
       chunk.ptr -= prev->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "prev" is invalidated but "it" remains valid.
     }
   }
@@ -191,11 +194,11 @@ void SwapSpace::Free(void* ptrV, size_t size) {
       chunk.size += it->size;
       auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
       DCHECK(erase_pos != free_by_size_.end());
-      RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+      RemoveChunk(erase_pos);
       // "it" is invalidated but we don't need it anymore.
     }
   }
-  InsertChunk(&free_by_start_, &free_by_size_, chunk);
+  InsertChunk(chunk);
 
   if (kCheckFreeMaps) {
     size_t free_after = CollectFree(free_by_start_, free_by_size_);
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9127b6b096..b659f1d3c7 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -19,42 +19,17 @@
 
 #include <cstdlib>
 #include <list>
+#include <vector>
 #include <set>
 #include <stdint.h>
 #include <stddef.h>
 
-#include "base/debug_stack.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "mem_map.h"
 
 namespace art {
 
-// Chunk of space.
-struct SpaceChunk {
-  uint8_t* ptr;
-  size_t size;
-
-  uintptr_t Start() const {
-    return reinterpret_cast<uintptr_t>(ptr);
-  }
-  uintptr_t End() const {
-    return reinterpret_cast<uintptr_t>(ptr) + size;
-  }
-};
-
-inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) {
-  return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr);
-}
-
-class SortChunkByPtr {
- public:
-  bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
-    return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
-  }
-};
-
 // An arena pool that creates arenas backed by an mmaped file.
 class SwapSpace {
  public:
@@ -68,17 +43,27 @@ class SwapSpace {
   }
 
  private:
-  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+  // Chunk of space.
+  struct SpaceChunk {
+    uint8_t* ptr;
+    size_t size;
 
-  int fd_;
-  size_t size_;
-  std::list<SpaceChunk> maps_;
+    uintptr_t Start() const {
+      return reinterpret_cast<uintptr_t>(ptr);
+    }
+    uintptr_t End() const {
+      return reinterpret_cast<uintptr_t>(ptr) + size;
+    }
+  };
 
-  // NOTE: Boost.Bimap would be useful for the two following members.
+  class SortChunkByPtr {
+   public:
+    bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
+      return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
+    }
+  };
 
-  // Map start of a free chunk to its size.
   typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
-  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
 
   // Map size to an iterator to free_by_start_'s entry.
   typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
@@ -92,6 +77,21 @@ class SwapSpace {
     }
   };
   typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet;
+
+  SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+
+  void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
+  void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
+
+  int fd_;
+  size_t size_;
+  std::list<SpaceChunk> maps_;
+
+  // NOTE: Boost.Bimap would be useful for the two following members.
+
+  // Map start of a free chunk to its size.
+  FreeByStartSet free_by_start_ GUARDED_BY(lock_);
+  // Free chunks ordered by size.
   FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -126,6 +126,9 @@ class SwapAllocator<void> {
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
@@ -201,9 +204,22 @@ class SwapAllocator {
 
   template <typename U>
   friend class SwapAllocator;
+
+  template <typename U>
+  friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
 };
 
 template <typename T>
+inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return lhs.swap_space_ == rhs.swap_space_;
+}
+
+template <typename T>
+inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
 using SwapVector = std::vector<T, SwapAllocator<T>>;
 template <typename T, typename Comparator>
 using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;