179 files changed, 12157 insertions, 3505 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index c663fcbf89..ac95abdd8d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
+	dex/quick/lazy_debug_frame_opcode_writer.cc \
 	dex/quick/local_optimizations.cc \
 	dex/quick/mips/assemble_mips.cc \
 	dex/quick/mips/call_mips.cc \
@@ -79,6 +80,13 @@ LIBART_COMPILER_SRC_FILES := \
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/relative_patcher.cc \
+	linker/arm/relative_patcher_arm_base.cc \
+	linker/arm/relative_patcher_thumb2.cc \
+	linker/arm64/relative_patcher_arm64.cc \
+	linker/x86/relative_patcher_x86_base.cc \
+	linker/x86/relative_patcher_x86.cc \
+	linker/x86_64/relative_patcher_x86_64.cc \
 	jit/jit_compiler.cc \
 	jni/quick/arm/calling_convention_arm.cc \
 	jni/quick/arm64/calling_convention_arm64.cc \
@@ -132,7 +140,6 @@ LIBART_COMPILER_SRC_FILES := \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
-	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 	utils/mips64/assembler_mips64.cc \
@@ -145,6 +152,7 @@ LIBART_COMPILER_SRC_FILES := \
 	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
+	elf_writer_debug.cc \
 	elf_writer_quick.cc \
 	file_output_stream.cc \
 	image_writer.cc \
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
new file mode 100644
index 0000000000..918179290b
--- /dev/null
+++ b/compiler/cfi_test.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_CFI_TEST_H_
+#define ART_COMPILER_CFI_TEST_H_
+
+#include <vector>
+#include <memory>
+#include <sstream>
+
+#include "arch/instruction_set.h"
+#include "dwarf/dwarf_test.h"
+#include "dwarf/headers.h"
+#include "disassembler/disassembler.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class CFITest : public dwarf::DwarfTest {
+ public:
+  void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str,
+                        const std::vector<uint8_t>& actual_asm,
+                        const std::vector<uint8_t>& actual_cfi) {
+    std::vector<std::string> lines;
+    // Print the raw bytes.
+    fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str);
+    HexDump(f, actual_asm);
+    fprintf(f, "\n};\n");
+    fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str);
+    HexDump(f, actual_cfi);
+    fprintf(f, "\n};\n");
+    // Pretty-print CFI opcodes.
+    constexpr bool is64bit = false;
+    dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
+    dwarf::WriteEhFrameCIE(is64bit, dwarf::Reg(8), initial_opcodes, &eh_frame_data_);
+    dwarf::WriteEhFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi, &eh_frame_data_);
+    ReformatCfi(Objdump(false, "-W"), &lines);
+    // Pretty-print assembly.
+    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
+    std::stringstream stream;
+    const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
+    disasm->Dump(stream, base, base + actual_asm.size());
+    ReformatAsm(&stream, &lines);
+    // Print CFI and assembly interleaved.
+    std::stable_sort(lines.begin(), lines.end(), CompareByAddress);
+    for (const std::string& line : lines) {
+      fprintf(f, "// %s\n", line.c_str());
+    }
+    fprintf(f, "\n");
+  }
+
+ private:
+  // Helper - get offset just past the end of given string.
+  static size_t FindEndOf(const std::string& str, const char* substr) {
+    size_t pos = str.find(substr);
+    CHECK_NE(std::string::npos, pos);
+    return pos + strlen(substr);
+  }
+
+  // Spit to lines and remove raw instruction bytes.
+  static void ReformatAsm(std::stringstream* stream,
+                          std::vector<std::string>* output) {
+    std::string line;
+    while (std::getline(*stream, line)) {
+      line = line.substr(0, FindEndOf(line, ": ")) +
+             line.substr(FindEndOf(line, "\t"));
+      size_t pos;
+      while ((pos = line.find("  ")) != std::string::npos) {
+        line = line.replace(pos, 2, " ");
+      }
+      while (!line.empty() && line.back() == ' ') {
+        line.pop_back();
+      }
+      output->push_back(line);
+    }
+  }
+
+  // Find interesting parts of objdump output and prefix the lines with address.
+  static void ReformatCfi(const std::vector<std::string>& lines,
+                          std::vector<std::string>* output) {
+    std::string address;
+    for (const std::string& line : lines) {
+      if (line.find("DW_CFA_nop") != std::string::npos) {
+        // Ignore.
+      } else if (line.find("DW_CFA_advance_loc") != std::string::npos) {
+        // The last 8 characters are the address.
+        address = "0x" + line.substr(line.size() - 8);
+      } else if (line.find("DW_CFA_") != std::string::npos) {
+        std::string new_line(line);
+        // "bad register" warning is caused by always using host (x86) objdump.
+        const char* bad_reg = "bad register: ";
+        size_t pos;
+        if ((pos = new_line.find(bad_reg)) != std::string::npos) {
+          new_line = new_line.replace(pos, strlen(bad_reg), "");
+        }
+        // Remove register names in parentheses since they have x86 names.
+        if ((pos = new_line.find(" (")) != std::string::npos) {
+          new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, "");
+        }
+        // Use the .cfi_ prefix.
+        new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
+        output->push_back(address + ": " + new_line);
+      }
+    }
+  }
+
+  // Compare strings by the address prefix.
+  static bool CompareByAddress(const std::string& lhs, const std::string& rhs) {
+    EXPECT_EQ(lhs[10], ':');
+    EXPECT_EQ(rhs[10], ':');
+    return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0;
+  }
+
+  // Pretty-print byte array.  12 bytes per line.
+  static void HexDump(FILE* f, const std::vector<uint8_t>& data) {
+    for (size_t i = 0; i < data.size(); i++) {
+      fprintf(f, i % 12 == 0 ? "\n    " : " ");  // Whitespace.
+      fprintf(f, "0x%02X,", data[i]);
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_CFI_TEST_H_
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 1d0aad5425..96d90bb443 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -24,6 +24,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "interpreter/interpreter.h"
 #include "mirror/art_method.h"
 #include "mirror/dex_cache.h"
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 1849e7ef64..4f7a970fdd 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -132,7 +132,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                const ArrayRef<const uint8_t>& vmap_table,
                                const ArrayRef<const uint8_t>& native_gc_map,
                                const ArrayRef<const uint8_t>& cfi_info,
-                               const ArrayRef<LinkerPatch>& patches)
+                               const ArrayRef<const LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()),
       owns_arrays_(!driver->DedupeEnabled()),
       frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
@@ -142,7 +142,6 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
     if (src_mapping_table == nullptr) {
       src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator());
     } else {
-      src_mapping_table->Arrange();
       src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(),
                                           driver->GetSwapSpaceAllocator());
     }
@@ -159,7 +158,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
   } else {
     src_mapping_table_ = src_mapping_table == nullptr ?
         driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) :
-        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(src_mapping_table->Arrange()));
+        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table));
     mapping_table_ = mapping_table.empty() ?
         nullptr : driver->DeduplicateMappingTable(mapping_table);
     vmap_table_ = driver->DeduplicateVMapTable(vmap_table);
@@ -180,7 +179,7 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
     const ArrayRef<const uint8_t>& vmap_table,
     const ArrayRef<const uint8_t>& native_gc_map,
     const ArrayRef<const uint8_t>& cfi_info,
-    const ArrayRef<LinkerPatch>& patches) {
+    const ArrayRef<const LinkerPatch>& patches) {
   SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
   CompiledMethod* ret = alloc.allocate(1);
   alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
@@ -189,38 +188,6 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
   return ret;
 }
 
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap(
-    CompilerDriver* driver,
-    InstructionSet instruction_set,
-    const ArrayRef<const uint8_t>& quick_code,
-    const size_t frame_size_in_bytes,
-    const uint32_t core_spill_mask,
-    const uint32_t fp_spill_mask,
-    const ArrayRef<const uint8_t>& stack_map) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
-  CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map,
-                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<LinkerPatch>());
-  return ret;
-}
-
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI(
-    CompilerDriver* driver,
-    InstructionSet instruction_set,
-    const ArrayRef<const uint8_t>& quick_code,
-    const size_t frame_size_in_bytes,
-    const uint32_t core_spill_mask,
-    const uint32_t fp_spill_mask,
-    const ArrayRef<const uint8_t>& cfi_info) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
-  CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, nullptr, ArrayRef<const uint8_t>(),
-                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-                  cfi_info, ArrayRef<LinkerPatch>());
-  return ret;
-}
 
 
 void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) {
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index d6a07f6226..480d021db0 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -94,20 +94,12 @@ class SrcMapElem {
   uint32_t from_;
   int32_t to_;
 
-  explicit operator int64_t() const {
-    return (static_cast<int64_t>(to_) << 32) | from_;
-  }
-
-  bool operator<(const SrcMapElem& sme) const {
-    return int64_t(*this) < int64_t(sme);
-  }
-
-  bool operator==(const SrcMapElem& sme) const {
-    return int64_t(*this) == int64_t(sme);
-  }
-
-  explicit operator uint8_t() const {
-    return static_cast<uint8_t>(from_ + to_);
+  // Lexicographical compare.
+  bool operator<(const SrcMapElem& other) const {
+    if (from_ != other.from_) {
+      return from_ < other.from_;
+    }
+    return to_ < other.to_;
   }
 };
 
@@ -129,49 +121,33 @@ class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
   SrcMap(InputIt first, InputIt last, const Allocator& alloc)
       : std::vector<SrcMapElem, Allocator>(first, last, alloc) {}
 
-  void SortByFrom() {
-    std::sort(begin(), end(), [] (const SrcMapElem& lhs, const SrcMapElem& rhs) -> bool {
-      return lhs.from_ < rhs.from_;
-    });
-  }
-
-  const_iterator FindByTo(int32_t to) const {
-    return std::lower_bound(begin(), end(), SrcMapElem({0, to}));
-  }
-
-  SrcMap& Arrange() {
+  void push_back(const SrcMapElem& elem) {
     if (!empty()) {
-      std::sort(begin(), end());
-      resize(std::unique(begin(), end()) - begin());
-      shrink_to_fit();
+      // Check that the addresses are inserted in sorted order.
+      DCHECK_GE(elem.from_, this->back().from_);
+      // If two consequitive entries map to the same value, ignore the later.
+      // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1.
+      if (elem.to_ == this->back().to_) {
+        return;
+      }
     }
-    return *this;
+    std::vector<SrcMapElem, Allocator>::push_back(elem);
   }
 
-  void DeltaFormat(const SrcMapElem& start, uint32_t highest_pc) {
-    // Convert from abs values to deltas.
-    if (!empty()) {
-      SortByFrom();
-
-      // TODO: one PC can be mapped to several Java src lines.
-      // do we want such a one-to-many correspondence?
-
-      // get rid of the highest values
-      size_t i = size() - 1;
-      for (; i > 0 ; i--) {
-        if ((*this)[i].from_ < highest_pc) {
-          break;
-        }
-      }
-      this->resize(i + 1);
-
-      for (i = size(); --i >= 1; ) {
-        (*this)[i].from_ -= (*this)[i-1].from_;
-        (*this)[i].to_ -= (*this)[i-1].to_;
-      }
-      DCHECK((*this)[0].from_ >= start.from_);
-      (*this)[0].from_ -= start.from_;
-      (*this)[0].to_ -= start.to_;
+  // Returns true and the corresponding "to" value if the mapping is found.
+  // Oterwise returns false and 0.
+  std::pair<bool, int32_t> Find(uint32_t from) const {
+    // Finds first mapping such that lb.from_ >= from.
+    auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN});
+    if (lb != end() && lb->from_ == from) {
+      // Found exact match.
+      return std::make_pair(true, lb->to_);
+    } else if (lb != begin()) {
+      // The previous mapping is still in effect.
+      return std::make_pair(true, (--lb)->to_);
+    } else {
+      // Not found because 'from' is smaller than first entry in the map.
+      return std::make_pair(false, 0);
     }
   }
 };
@@ -185,6 +161,7 @@ enum LinkerPatchType {
   kLinkerPatchCall,
   kLinkerPatchCallRelative,  // NOTE: Actual patching is instruction_set-dependent.
   kLinkerPatchType,
+  kLinkerPatchDexCacheArray,  // NOTE: Actual patching is instruction_set-dependent.
 };
 
 class LinkerPatch {
@@ -192,28 +169,44 @@ class LinkerPatch {
   static LinkerPatch MethodPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_method_idx) {
-    return LinkerPatch(literal_offset, kLinkerPatchMethod,
-                       target_method_idx, target_dex_file);
+    LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file);
+    patch.method_idx_ = target_method_idx;
+    return patch;
   }
 
   static LinkerPatch CodePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_method_idx) {
-    return LinkerPatch(literal_offset, kLinkerPatchCall,
-                       target_method_idx, target_dex_file);
+    LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file);
+    patch.method_idx_ = target_method_idx;
+    return patch;
   }
 
   static LinkerPatch RelativeCodePatch(size_t literal_offset,
                                        const DexFile* target_dex_file,
                                        uint32_t target_method_idx) {
-    return LinkerPatch(literal_offset, kLinkerPatchCallRelative,
-                       target_method_idx, target_dex_file);
+    LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file);
+    patch.method_idx_ = target_method_idx;
+    return patch;
   }
 
   static LinkerPatch TypePatch(size_t literal_offset,
                                const DexFile* target_dex_file,
                                uint32_t target_type_idx) {
-    return LinkerPatch(literal_offset, kLinkerPatchType, target_type_idx, target_dex_file);
+    LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file);
+    patch.type_idx_ = target_type_idx;
+    return patch;
+  }
+
+  static LinkerPatch DexCacheArrayPatch(size_t literal_offset,
+                                        const DexFile* target_dex_file,
+                                        uint32_t pc_insn_offset,
+                                        size_t element_offset) {
+    DCHECK(IsUint<32>(element_offset));
+    LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file);
+    patch.pc_insn_offset_ = pc_insn_offset;
+    patch.element_offset_ = element_offset;
+    return patch;
   }
 
   LinkerPatch(const LinkerPatch& other) = default;
@@ -227,10 +220,14 @@ class LinkerPatch {
     return patch_type_;
   }
 
+  bool IsPcRelative() const {
+    return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray;
+  }
+
   MethodReference TargetMethod() const {
     DCHECK(patch_type_ == kLinkerPatchMethod ||
            patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative);
-    return MethodReference(target_dex_file_, target_idx_);
+    return MethodReference(target_dex_file_, method_idx_);
   }
 
   const DexFile* TargetTypeDexFile() const {
@@ -240,22 +237,52 @@ class LinkerPatch {
 
   uint32_t TargetTypeIndex() const {
     DCHECK(patch_type_ == kLinkerPatchType);
-    return target_idx_;
+    return type_idx_;
+  }
+
+  const DexFile* TargetDexCacheDexFile() const {
+    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    return target_dex_file_;
+  }
+
+  size_t TargetDexCacheElementOffset() const {
+    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    return element_offset_;
+  }
+
+  uint32_t PcInsnOffset() const {
+    DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+    return pc_insn_offset_;
   }
 
  private:
-  LinkerPatch(size_t literal_offset, LinkerPatchType patch_type,
-              uint32_t target_idx, const DexFile* target_dex_file)
-      : literal_offset_(literal_offset),
-        patch_type_(patch_type),
-        target_idx_(target_idx),
-        target_dex_file_(target_dex_file) {
+  LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file)
+      : target_dex_file_(target_dex_file),
+        literal_offset_(literal_offset),
+        patch_type_(patch_type) {
+    cmp1_ = 0u;
+    cmp2_ = 0u;
+    // The compiler rejects methods that are too big, so the compiled code
+    // of a single method really shouln't be anywhere close to 16MiB.
+    DCHECK(IsUint<24>(literal_offset));
   }
 
-  size_t literal_offset_;
-  LinkerPatchType patch_type_;
-  uint32_t target_idx_;  // Method index (Call/Method patches) or type index (Type patches).
   const DexFile* target_dex_file_;
+  uint32_t literal_offset_ : 24;  // Method code size up to 16MiB.
+  LinkerPatchType patch_type_ : 8;
+  union {
+    uint32_t cmp1_;             // Used for relational operators.
+    uint32_t method_idx_;       // Method index for Call/Method patches.
+    uint32_t type_idx_;         // Type index for Type patches.
+    uint32_t element_offset_;   // Element offset in the dex cache arrays.
+  };
+  union {
+    uint32_t cmp2_;             // Used for relational operators.
+    // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
+    // may be different if the PC-relative addressing needs multiple insns).
+    uint32_t pc_insn_offset_;
+    static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators");
+  };
 
   friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
   friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
@@ -264,15 +291,17 @@ class LinkerPatch {
 inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
   return lhs.literal_offset_ == rhs.literal_offset_ &&
       lhs.patch_type_ == rhs.patch_type_ &&
-      lhs.target_idx_ == rhs.target_idx_ &&
-      lhs.target_dex_file_ == rhs.target_dex_file_;
+      lhs.target_dex_file_ == rhs.target_dex_file_ &&
+      lhs.cmp1_ == rhs.cmp1_ &&
+      lhs.cmp2_ == rhs.cmp2_;
 }
 
 inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) {
   return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_
       : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_
-      : (lhs.target_idx_ != rhs.target_idx_) ? lhs.target_idx_ < rhs.target_idx_
-      : lhs.target_dex_file_ < rhs.target_dex_file_;
+      : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_
+      : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_
+      : lhs.cmp2_ < rhs.cmp2_;
 }
 
 class CompiledMethod FINAL : public CompiledCode {
@@ -291,7 +320,7 @@ class CompiledMethod FINAL : public CompiledCode {
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& native_gc_map,
                  const ArrayRef<const uint8_t>& cfi_info,
-                 const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
+                 const ArrayRef<const LinkerPatch>& patches);
 
   virtual ~CompiledMethod();
 
@@ -307,24 +336,7 @@ class CompiledMethod FINAL : public CompiledCode {
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& native_gc_map,
       const ArrayRef<const uint8_t>& cfi_info,
-      const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
-
-  static CompiledMethod* SwapAllocCompiledMethodStackMap(
-      CompilerDriver* driver,
-      InstructionSet instruction_set,
-      const ArrayRef<const uint8_t>& quick_code,
-      const size_t frame_size_in_bytes,
-      const uint32_t core_spill_mask,
-      const uint32_t fp_spill_mask,
-      const ArrayRef<const uint8_t>& stack_map);
-
-  static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver,
-                                                    InstructionSet instruction_set,
-                                                    const ArrayRef<const uint8_t>& quick_code,
-                                                    const size_t frame_size_in_bytes,
-                                                    const uint32_t core_spill_mask,
-                                                    const uint32_t fp_spill_mask,
-                                                    const ArrayRef<const uint8_t>& cfi_info);
+      const ArrayRef<const LinkerPatch>& patches);
 
   static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m);
 
@@ -362,8 +374,8 @@ class CompiledMethod FINAL : public CompiledCode {
     return cfi_info_;
   }
 
-  const SwapVector<LinkerPatch>& GetPatches() const {
-    return patches_;
+  ArrayRef<const LinkerPatch> GetPatches() const {
+    return ArrayRef<const LinkerPatch>(patches_);
   }
 
  private:
@@ -375,7 +387,7 @@ class CompiledMethod FINAL : public CompiledCode {
   const uint32_t core_spill_mask_;
   // For quick code, a bit mask describing spilled FPR callee-save registers.
   const uint32_t fp_spill_mask_;
-  // For quick code, a set of pairs (PC, Line) mapping from native PC offset to Java line
+  // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
   SwapSrcMap* src_mapping_table_;
   // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to
   // native PC offset. Size prefixed.
@@ -388,7 +400,7 @@ class CompiledMethod FINAL : public CompiledCode {
   // For quick code, a FDE entry for the debug_frame section.
   SwapVector<uint8_t>* cfi_info_;
   // For quick code, linker patches needed by the method.
-  SwapVector<LinkerPatch> patches_;
+  const SwapVector<LinkerPatch> patches_;
 };
 
 }  // namespace art
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 6ec39f9605..a04641e3fa 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -107,6 +107,9 @@ class Compiler {
     return driver_;
   }
 
+  // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+  static constexpr bool kProduce64BitELFFiles = false;
+
  private:
   CompilerDriver* const driver_;
   const uint64_t maximum_compilation_time_before_warning_;
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 93d83c6fd4..0850f42a9a 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -403,13 +403,6 @@ class SuspendCheckElimination : public PassME {
     DCHECK(bb != nullptr);
     return c_unit->mir_graph->EliminateSuspendChecks(bb);
   }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateSuspendChecksEnd();
-  }
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 39725dee38..0acdd422df 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -99,14 +99,16 @@ std::ostream& operator<<(std::ostream& os, const BBType& code);
 
 // Shared pseudo opcodes - must be < 0.
 enum LIRPseudoOpcode {
-  kPseudoExportedPC = -16,
-  kPseudoSafepointPC = -15,
-  kPseudoIntrinsicRetry = -14,
-  kPseudoSuspendTarget = -13,
-  kPseudoThrowTarget = -12,
-  kPseudoCaseLabel = -11,
-  kPseudoMethodEntry = -10,
-  kPseudoMethodExit = -9,
+  kPseudoPrologueBegin = -18,
+  kPseudoPrologueEnd = -17,
+  kPseudoEpilogueBegin = -16,
+  kPseudoEpilogueEnd = -15,
+  kPseudoExportedPC = -14,
+  kPseudoSafepointPC = -13,
+  kPseudoIntrinsicRetry = -12,
+  kPseudoSuspendTarget = -11,
+  kPseudoThrowTarget = -10,
+  kPseudoCaseLabel = -9,
   kPseudoBarrier = -8,
   kPseudoEntryBlock = -7,
   kPseudoExitBlock = -6,
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 2d4c18ff49..ec12221f3c 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -1357,7 +1357,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) {
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
       UNREACHABLE();
-      break;
   }
 
   if (mir->ssa_rep->num_defs != 0) {
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index dc222b5211..cdf5e38a9c 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -166,9 +166,9 @@ class LocalValueNumbering::AliasingArrayVersions {
     return gvn->LookupValue(kAliasingArrayOp, type, location, memory_version);
   }
 
-  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
+  static uint16_t LookupMergeValue(GlobalValueNumbering* gvn,
                                    const LocalValueNumbering* lvn,
-                                   uint16_t type ATTRIBUTE_UNUSED, uint16_t location) {
+                                   uint16_t type, uint16_t location) {
     // If the location is non-aliasing in lvn, use the non-aliasing value.
     uint16_t array = gvn->GetArrayLocationBase(location);
     if (lvn->IsNonAliasingArray(array, type)) {
@@ -182,8 +182,6 @@ class LocalValueNumbering::AliasingArrayVersions {
   static bool HasNewBaseVersion(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
                                 const LocalValueNumbering* lvn,
                                 uint16_t type ATTRIBUTE_UNUSED) {
-    UNUSED(gvn);
-    UNUSED(type);
     return lvn->global_memory_version_ == lvn->merge_new_memory_version_;
   }
 
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f638b0bf4d..2a920a4e29 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() {
   InitializeBasicBlockDataFlow();
 }
 
+uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
+  // Each level of nesting adds *100 to count, up to 3 levels deep.
+  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+  uint32_t weight = std::max(1U, depth * 100);
+  return weight;
+}
+
 /*
  * Count uses, weighting by loop nesting depth.  This code only
  * counts explicitly used s_regs.  A later phase will add implicit
@@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
-  // Each level of nesting adds *100 to count, up to 3 levels deep.
-  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-  uint32_t weight = std::max(1U, depth * 100);
+  uint32_t weight = GetUseCountWeight(bb);
   for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
     if (mir->ssa_rep == NULL) {
       continue;
@@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) {
       raw_use_counts_[s_reg] += 1u;
       use_counts_[s_reg] += weight;
     }
-    if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = GetDataFlowAttributes(mir);
-      // Implicit use of Method* ? */
-      if (df_attributes & DF_UMS) {
-        /*
-         * Some invokes will not use Method* - need to perform test similar
-         * to that found in GenInvoke() to decide whether to count refs
-         * for Method* on invoke-class opcodes.  This is a relatively expensive
-         * operation, so should only be done once.
-         * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
-         * and save results for both here and GenInvoke.  For now, go ahead
-         * and assume all invokes use method*.
-         */
-        raw_use_counts_[method_sreg_] += 1u;
-        use_counts_[method_sreg_] += weight;
-      }
-    }
   }
 }
 
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index d2079a254d..a9ab3bb0d4 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -19,6 +19,7 @@
 #include <string.h>
 
 #include "base/logging.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_driver-inl.h"
 #include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 3103f96e4e..4d340387f2 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -688,7 +688,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
 
 /* Parse a Dex method and insert it into the MIRGraph at the current insert point. */
 void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
-                           InvokeType invoke_type, uint16_t class_def_idx,
+                           InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx,
                            uint32_t method_idx, jobject class_loader, const DexFile& dex_file) {
   current_code_item_ = code_item;
   method_stack_.push_back(std::make_pair(current_method_, current_offset_));
@@ -726,13 +726,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     null_block->hidden = true;
     entry_block_ = CreateNewBB(kEntryBlock);
     exit_block_ = CreateNewBB(kExitBlock);
-    // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated.
-    cu_->dex_file = &dex_file;
-    cu_->class_def_idx = class_def_idx;
-    cu_->method_idx = method_idx;
-    cu_->access_flags = access_flags;
-    cu_->invoke_type = invoke_type;
-    cu_->shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   } else {
     UNIMPLEMENTED(FATAL) << "Nested inlining not implemented.";
     /*
@@ -1616,8 +1609,8 @@ void MIRGraph::ReplaceSpecialChars(std::string& str) {
 }
 
 std::string MIRGraph::GetSSAName(int ssa_reg) {
-  // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to
-  //       the arena. We should be smarter and just place straight into the arena, or compute the
+  // TODO: This value is needed for debugging. Currently, we compute this and then copy to the
+  //       arena. We should be smarter and just place straight into the arena, or compute the
   //       value more lazily.
   int vreg = SRegToVReg(ssa_reg);
   if (vreg >= static_cast<int>(GetFirstTempVR())) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3298af1162..85b13448da 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -960,6 +960,12 @@ class MIRGraph {
    */
   CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
 
+  /**
+   * @brief Used to remove last created compiler temporary when it's not needed.
+   * @param temp the temporary to remove.
+   */
+  void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
@@ -1079,7 +1085,6 @@ class MIRGraph {
   void EliminateDeadCodeEnd();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
-  void EliminateSuspendChecksEnd();
 
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1185,6 +1190,12 @@ class MIRGraph {
   void DoConstantPropagation(BasicBlock* bb);
 
   /**
+   * @brief Get use count weight for a given block.
+   * @param bb the BasicBlock.
+   */
+  uint32_t GetUseCountWeight(BasicBlock* bb) const;
+
+  /**
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
@@ -1396,10 +1407,6 @@ class MIRGraph {
       uint16_t* sfield_ids;  // Ditto.
       GvnDeadCodeElimination* dce;
     } gvn;
-    // Suspend check elimination.
-    struct {
-      DexFileMethodInliner* inliner;
-    } sce;
   } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
@@ -1451,6 +1458,7 @@ class MIRGraph {
   friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
   friend class TopologicalSortOrderTest;
+  friend class QuickCFITest;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 34fb1bf0e0..0c84b82edd 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,9 +16,13 @@
 
 # include "mir_method_info.h"
 
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
 #include "mirror/class_loader.h"  // Only to allow casts in Handle<ClassLoader>.
 #include "mirror/dex_cache.h"     // Only to allow casts in Handle<DexCache>.
 #include "scoped_thread_state_change.h"
@@ -62,6 +66,9 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
   const DexFile* const dex_file = mUnit->GetDexFile();
   const bool use_jit = runtime->UseJit();
   const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
+  DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
+  DexFileMethodInliner* default_inliner =
+      (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
 
   for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
     // For quickened invokes, the dex method idx is actually the mir offset.
@@ -120,6 +127,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
     if (UNLIKELY(resolved_method == nullptr)) {
       continue;
     }
+
     compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
         &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
     if (!it->IsQuickened()) {
@@ -131,6 +139,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
       it->vtable_idx_ =
           compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
     }
+
     MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
     int fast_path_flags = compiler_driver->IsFastInvoke(
         soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
@@ -138,10 +147,23 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
     const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
     const bool is_class_initialized =
         compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
+
+    // Check if the target method is intrinsic or special.
+    InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
+    if (inliner_map != nullptr) {
+      auto* inliner = (target_method.dex_file == dex_file)
+          ? default_inliner
+          : inliner_map->GetMethodInliner(target_method.dex_file);
+      is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
+    }
+
     uint16_t other_flags = it->flags_ &
-        ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin));
+        ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
+            (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
         (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
+        ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
         (is_referrers_class ? kFlagIsReferrersClass : 0u) |
         (is_class_initialized ? kFlagClassIsInitialized : 0u);
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
index e131c96a81..7230c462cd 100644
--- a/compiler/dex/mir_method_info.h
+++ b/compiler/dex/mir_method_info.h
@@ -127,6 +127,14 @@ class MirMethodLoweringInfo : public MirMethodInfo {
     return (flags_ & kFlagFastPath) != 0u;
   }
 
+  bool IsIntrinsic() const {
+    return (flags_ & kFlagIsIntrinsic) != 0u;
+  }
+
+  bool IsSpecial() const {
+    return (flags_ & kFlagIsSpecial) != 0u;
+  }
+
   bool IsReferrersClass() const {
     return (flags_ & kFlagIsReferrersClass) != 0;
   }
@@ -188,9 +196,11 @@ class MirMethodLoweringInfo : public MirMethodInfo {
  private:
   enum {
     kBitFastPath = kMethodInfoBitEnd,
+    kBitIsIntrinsic,
+    kBitIsSpecial,
     kBitInvokeTypeBegin,
     kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3,  // 3 bits for invoke type.
-    kBitSharpTypeBegin,
+    kBitSharpTypeBegin = kBitInvokeTypeEnd,
     kBitSharpTypeEnd = kBitSharpTypeBegin + 3,  // 3 bits for sharp type.
     kBitIsReferrersClass = kBitSharpTypeEnd,
     kBitClassIsInitialized,
@@ -199,6 +209,8 @@ class MirMethodLoweringInfo : public MirMethodInfo {
   };
   static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
   static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
+  static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
+  static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
   static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
   static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
   static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index c85c3b6f21..9d7b4b4dfd 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Since VR temps cannot be requested once the BE temps are requested, we
     // allow reservation of VR temps as well for BE. We
     size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
-    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+    size_t needed_temps = wide ? 2u : 1u;
+    if (available_temps < needed_temps) {
       if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available.";
+        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
+            << " are available.";
       }
       return nullptr;
     }
@@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Update the remaining reserved temps since we have now used them.
     // Note that the code below is actually subtracting to remove them from reserve
     // once they have been claimed. It is careful to not go below zero.
-    if (reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
-    if (wide && reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
+    reserved_temps_for_backend_ =
+        std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
 
     // The new non-special compiler temp must receive a unique v_reg.
     compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
@@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
   return compiler_temp;
 }
 
+void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
+  // Once the compiler temps have been committed, it's too late for any modifications.
+  DCHECK_EQ(compiler_temps_committed_, false);
+
+  size_t used_temps = wide ? 2u : 1u;
+
+  if (ct_type == kCompilerTempBackend) {
+    DCHECK(requested_backend_temp_);
+
+    // Make the temps available to backend again.
+    reserved_temps_for_backend_ += used_temps;
+  } else if (ct_type == kCompilerTempVR) {
+    DCHECK(!requested_backend_temp_);
+  } else {
+    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
+  }
+
+  // Reduce the number of non-special compiler temps.
+  DCHECK_LE(used_temps, num_non_special_compiler_temps_);
+  num_non_special_compiler_temps_ -= used_temps;
+
+  // Check that this was really the last temp.
+  DCHECK_EQ(static_cast<size_t>(temp->v_reg),
+            GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
+
+  if (cu_->verbose) {
+    LOG(INFO) << "Last temporary has been removed.";
+  }
+}
+
 static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
   bool is_taken;
   switch (opcode) {
@@ -1489,7 +1517,7 @@ void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
       continue;
     }
     const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
-    if (!method_info.FastPath()) {
+    if (!method_info.FastPath() || !method_info.IsSpecial()) {
       continue;
     }
 
@@ -1631,10 +1659,6 @@ bool MIRGraph::EliminateSuspendChecksGate() {
       !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
     return false;
   }
-  if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
-    temp_.sce.inliner =
-        cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-  }
   suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
   return true;
 }
@@ -1652,9 +1676,9 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
   uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
   bool found_invoke = false;
   for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
-        (temp_.sce.inliner == nullptr ||
-         !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+    if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
+        IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
+        !GetMethodLoweringInfo(mir).IsIntrinsic()) {
       // Non-intrinsic invoke, rely on a suspend point in the invoked method.
       found_invoke = true;
       break;
@@ -1717,10 +1741,6 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
   return true;
 }
 
-void MIRGraph::EliminateSuspendChecksEnd() {
-  temp_.sce.inliner = nullptr;
-}
-
 bool MIRGraph::CanThrow(MIR* mir) const {
   if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
     return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9ce5ebbc1b..10a4337cf5 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -474,7 +474,6 @@ class SuspendCheckEliminationTest : public MirOptimizationTest {
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateSuspendChecks(bb);
     }
-    cu_.mir_graph->EliminateSuspendChecksEnd();
   }
 
   SuspendCheckEliminationTest()
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 3e69878846..c5ac4c1508 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1083,7 +1083,9 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
 #define PADDING_MOV_R5_R5               0x1C2D
 
 uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  uint8_t* const write_buffer = write_pos;
   for (; lir != NULL; lir = NEXT_LIR(lir)) {
+    lir->offset = (write_pos - write_buffer);
     if (!lir->flags.is_nop) {
       int opcode = lir->opcode;
       if (IsPseudoLirOp(opcode)) {
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d46c25a8da..3d18af6169 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -23,11 +23,13 @@
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "gc/accounting/card_table.h"
 #include "mirror/art_method.h"
 #include "mirror/object_array-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -353,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::ArmCore(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::ArmFp(num);
+}
+
 void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
@@ -371,7 +382,6 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
    * a leaf *and* our frame size < fudge factor.
    */
   bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm);
-  NewLIR0(kPseudoMethodEntry);
   const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm);
   bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   bool generate_explicit_stack_overflow_check = large_frame ||
@@ -402,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
     }
   }
   /* Spill core callee saves */
-  if (core_spill_mask_ == 0u) {
-    // Nothing to spill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
-    // Spilling only low regs and/or LR, use 16-bit PUSH.
-    constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
-    NewLIR1(kThumbPush,
-            (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Push cannot be used to spill a single register.
-    NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+      // Spilling only low regs and/or LR, use 16-bit PUSH.
+      constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+      NewLIR1(kThumbPush,
+              (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Push cannot be used to spill a single register.
+      NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Push, core_spill_mask_);
+    }
+    cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize);
   }
   /* Need to spill any FP regs? */
-  if (num_fp_spills_) {
+  if (num_fp_spills_ != 0u) {
     /*
      * NOTE: fp spills are a little different from core spills in that
      * they are pushed as a contiguous block.  When promoting from
      * the fp set, we must allocate all singles from s16..highest-promoted
      */
     NewLIR1(kThumb2VPushCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize);
   }
 
   const int spill_size = spill_count * 4;
@@ -444,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
             m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
           }
           m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
           // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
           // codegen and target are in thumb2 mode.
           // NOTE: native pointer.
           m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
        private:
@@ -464,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         // Need to restore LR since we used it as a temp.
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
         OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
       } else {
         /*
          * If the frame is small enough we are guaranteed to have enough space that remains to
@@ -474,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         MarkTemp(rs_rARM_LR);
         FreeTemp(rs_rARM_LR);
         OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
         Clobber(rs_rARM_LR);
         UnmarkTemp(rs_rARM_LR);
         LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
@@ -483,13 +501,23 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       // Implicit stack overflow check has already been done.  Just make room on the
       // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+      cfi_.AdjustCFAOffset(frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   FlushIns(ArgLocs, rl_method);
 
+  // We can promote a PC-relative reference to dex cache arrays to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
+                             dex_cache_arrays_base_reg_);
+  }
+
   FreeTemp(rs_r0);
   FreeTemp(rs_r1);
   FreeTemp(rs_r2);
@@ -498,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 }
 
 void ArmMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   int spill_count = num_core_spills_ + num_fp_spills_;
+
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -506,35 +536,47 @@ void ArmMir2Lir::GenExitSequence() {
   LockTemp(rs_r0);
   LockTemp(rs_r1);
 
-  NewLIR0(kPseudoMethodExit);
-  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
+  int adjust = frame_size_ - (spill_count * kArmPointerSize);
+  OpRegImm(kOpAdd, rs_rARM_SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
   /* Need to restore any FP callee saves? */
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize);
+    cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
-    /* Unspill rARM_LR to rARM_PC */
+  bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
+  if (unspill_LR_to_PC) {
     core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
     core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
-  if (core_spill_mask_ == 0u) {
-    // Nothing to unspill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
-    // Unspilling only low regs and/or PC, use 16-bit POP.
-    constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
-    NewLIR1(kThumbPop,
-            (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Pop cannot be used to unspill a single register.
-    NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Pop, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+      // Unspilling only low regs and/or PC, use 16-bit POP.
+      constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+      NewLIR1(kThumbPop,
+              (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Pop cannot be used to unspill a single register.
+      NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Pop, core_spill_mask_);
+    }
+    // If we pop to PC, there is no further epilogue code.
+    if (!unspill_LR_to_PC) {
+      cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
+      cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
+      DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+    }
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
+  if (!unspill_LR_to_PC) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
     NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void ArmMir2Lir::GenSpecialExitSequence() {
@@ -556,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() {
   NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
           (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
           (1u << 8));                                             // LR encoded for 16-bit push.
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register r0.
+  cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize);
 }
 
 void ArmMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
 }
 
 static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -572,12 +619,12 @@ static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& targe
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
-                             int state, const MethodReference& target_method,
-                             uint32_t unused_idx ATTRIBUTE_UNUSED,
-                             uintptr_t direct_code, uintptr_t direct_method,
-                             InvokeType type) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                  int state, const MethodReference& target_method,
+                                  uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  InvokeType type) {
+  ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
@@ -598,17 +645,24 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
       return -1;
     }
   } else {
+    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
+      if (!use_pc_rel) {
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      }
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -620,14 +674,23 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
       }
-      break;
+      if (!use_pc_rel || direct_code != 0) {
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
-                          target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                            target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      } else {
+        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+      }
       break;
     case 3:  // Grab the code from the method*
       if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 4141bcfe98..83b27df939 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -82,6 +82,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
     void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
+    bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+    void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
     RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
@@ -257,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
      */
     LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+    void DoPromotion() OVERRIDE;
+
     /*
      * @brief Handle ARM specific literals.
      */
@@ -300,6 +306,13 @@ class ArmMir2Lir FINAL : public Mir2Lir {
 
     ArenaVector<LIR*> call_method_insns_;
 
+    // Instructions needing patching with PC relative code addresses.
+    ArenaVector<LIR*> dex_cache_access_insns_;
+
+    // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
+    // if promoted.
+    RegStorage dex_cache_arrays_base_reg_;
+
     /**
      * @brief Given float register pair, returns Solo64 float register.
      * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
@@ -329,6 +342,14 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     }
 
     int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
+    static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                 int state, const MethodReference& target_method,
+                                 uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                 uintptr_t direct_code, uintptr_t direct_method,
+                                 InvokeType type);
+
+    void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 9193e1b23c..47669db979 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1087,6 +1087,36 @@ void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   lir->target = target;
 }
 
+bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return dex_cache_arrays_layout_.Valid();
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
+  LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
+  ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
+  LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
+  add_pc->flags.fixup = kFixupLabel;
+  movw->operands[2] = WrapPointer(dex_file);
+  movw->operands[3] = offset;
+  movw->operands[4] = WrapPointer(add_pc);
+  movt->operands[2] = movw->operands[2];
+  movt->operands[3] = movw->operands[3];
+  movt->operands[4] = movw->operands[4];
+  dex_cache_access_insns_.push_back(movw);
+  dex_cache_access_insns_.push_back(movt);
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
+                r_dest, kNotVolatile);
+  } else {
+    OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
+    LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+  }
+}
+
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
   return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 9812d9ff99..5f27338e6b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -575,7 +575,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()) {
+      call_method_insns_(arena->Adapter()),
+      dex_cache_access_insns_(arena->Adapter()),
+      dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
   call_method_insns_.reserve(100);
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kArmLast; i++) {
@@ -901,14 +903,28 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
 }
 
 void ArmMir2Lir::InstallLiteralPools() {
+  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
   // PC-relative calls to methods.
-  patches_.reserve(call_method_insns_.size());
   for (LIR* p : call_method_insns_) {
-      DCHECK_EQ(p->opcode, kThumb2Bl);
-      uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
-                                                        target_dex_file, target_method_idx));
+    DCHECK_EQ(p->opcode, kThumb2Bl);
+    uint32_t target_method_idx = p->operands[1];
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+                                                      target_dex_file, target_method_idx));
+  }
+
+  // PC-relative dex cache array accesses.
+  for (LIR* p : dex_cache_access_insns_) {
+    DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H);
+    const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]);
+    DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH);
+    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    uint32_t offset = p->operands[3];
+    DCHECK(!p->flags.is_nop);
+    DCHECK(!add_pc->flags.is_nop);
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset,
+                                                       dex_file, add_pc->offset, offset));
   }
 
   // And do the normal processing.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index e4bd2a33ae..25ea6941c0 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arm_lir.h"
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "driver/compiler_driver.h"
@@ -1266,4 +1267,39 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
   return offset;
 }
 
+void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative
+    // load sequence is 4 instructions long and by promoting the PC base we save up to 3
+    // instructions per use.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 3;
+    }
+  }
+}
+
+void ArmMir2Lir::DoPromotion() {
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d15412a1bd..f6fa9389d0 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -236,6 +236,7 @@ enum A64Opcode {
   kA64Add4rrro,      // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
   kA64Add4RRre,      // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+  kA64Adrp2xd,       // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0].
   kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
   kA64Asr3rrd,       // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 329bb1e770..2f1ae66bfc 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -131,6 +131,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
                  kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
                  "adr", "!0x, #!1d", kFixupAdr),
+    ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000),
+                 kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
+                 "adrp", "!0x, #!1d", kFixupLabel),
     ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
                  kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
@@ -682,7 +686,9 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir)
 #define PADDING_NOP (UINT32_C(0xd503201f))
 
 uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  uint8_t* const write_buffer = write_pos;
   for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+    lir->offset = (write_pos - write_buffer);
     bool opcode_is_wide = IS_WIDE(lir->opcode);
     A64Opcode opcode = UNWIDE(lir->opcode);
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 823cb60d97..4abbd77d88 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -23,10 +23,12 @@
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/art_method.h"
 #include "mirror/object_array-inl.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -280,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+
   /*
    * On entry, x0 to x7 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
@@ -310,8 +318,6 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
   bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
     !FrameNeedsStackCheck(frame_size_, kArm64);
 
-  NewLIR0(kPseudoMethodEntry);
-
   const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
   const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   bool generate_explicit_stack_overflow_check = large_frame ||
@@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 
   if (spilled_already != frame_size_) {
     OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   if (!skip_overflow_check) {
@@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
           GenerateTargetLabel(kPseudoThrowTarget);
           // Unwinds stack.
           m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
           m2l_->LockTemp(rs_xIP0);
           m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
           m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
           m2l_->FreeTemp(rs_xIP0);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
       private:
@@ -393,19 +402,20 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
    */
   LockTemp(rs_x0);
   LockTemp(rs_x1);
-
-  NewLIR0(kPseudoMethodExit);
-
   UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
 
   // Finally return.
   NewLIR0(kA64Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void Arm64Mir2Lir::GenSpecialExitSequence() {
@@ -422,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
   core_vmap_table_.clear();
   fp_vmap_table_.clear();
   NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register x0.
+  cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
 }
 
 void Arm64Mir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.Restore(DwarfCoreReg(rxLR));
 }
 
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -438,13 +453,13 @@ static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& tar
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                               int state, const MethodReference& target_method,
-                               uint32_t unused_idx,
-                               uintptr_t direct_code, uintptr_t direct_method,
-                               InvokeType type) {
+int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                                      int state, const MethodReference& target_method,
+                                      uint32_t unused_idx,
+                                      uintptr_t direct_code, uintptr_t direct_method,
+                                      InvokeType type) {
   UNUSED(info, unused_idx);
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+  Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
@@ -465,17 +480,24 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
       return -1;
     }
   } else {
+    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
+      if (!use_pc_rel) {
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      }
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -487,14 +509,23 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
       }
-      break;
+      if (!use_pc_rel || direct_code != 0) {
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
-                          target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                            target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      } else {
+        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+      }
       break;
     case 3:  // Grab the code from the method*
       if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 54fd46de0e..8184f02287 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -78,6 +78,9 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
   void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
+  bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+  void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
                          int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
 
@@ -393,9 +396,16 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                      RegLocation rl_src2, bool is_div, int flags);
 
+  static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                                 int state, const MethodReference& target_method,
+                                 uint32_t unused_idx,
+                                 uintptr_t direct_code, uintptr_t direct_method,
+                                 InvokeType type);
+
   static const A64EncodingMap EncodingMap[kA64Last];
 
   ArenaVector<LIR*> call_method_insns_;
+  ArenaVector<LIR*> dex_cache_access_insns_;
 
   int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2372ccc527..20f61f2261 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -943,6 +943,23 @@ void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   lir->target = target;
 }
 
+bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return dex_cache_arrays_layout_.Valid();
+}
+
+void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
+                                            RegStorage r_dest) {
+  LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
+  adrp->operands[2] = WrapPointer(dex_file);
+  adrp->operands[3] = offset;
+  adrp->operands[4] = WrapPointer(adrp);
+  dex_cache_access_insns_.push_back(adrp);
+  LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile);
+  ldr->operands[4] = adrp->operands[4];
+  ldr->flags.fixup = kFixupLabel;
+  dex_cache_access_insns_.push_back(ldr);
+}
+
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
   UNUSED(r_base, count);
   LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
@@ -1441,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
   return reg_mask;
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::Arm64Fp(num);
+}
+
 static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   int reg1 = -1, reg2 = -1;
   const int reg_log2_size = 3;
@@ -1449,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1466,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1476,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
 static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
                            int frame_size) {
   m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
+  m2l->cfi().AdjustCFAOffset(frame_size);
 
   int core_count = POPCOUNT(core_reg_mask);
 
@@ -1535,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
         cur_offset = 0;  // That core reg needs to go into the upper half.
       }
     } else {
@@ -1547,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
         fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       }
     }
   } else {
@@ -1563,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     } else {
       core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     }
   }
+  DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
+            static_cast<int>(all_offset * kArm64PointerSize));
 
   if (fp_count != 0) {
     for (; fp_reg_mask != 0;) {
@@ -1577,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       if (UNLIKELY(reg2 < 0)) {
         m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                      cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
         // Do not increment offset here, as the second half will be filled by a core reg.
       } else {
         m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
         cur_offset += 2;
       }
     }
@@ -1593,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
                    cur_offset + 1);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
       cur_offset += 2;  // Half-slot filled now.
     }
   }
@@ -1603,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
     core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
     m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                  RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
   }
 
   DCHECK_EQ(cur_offset, all_offset);
@@ -1633,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     } else {
       DCHECK_LE(offset, 63);
       m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg2));
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     }
   }
 }
@@ -1650,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     } else {
       m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfFpReg(reg2));
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     }
   }
 }
@@ -1694,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
     early_drop = RoundDown(early_drop, 16);
 
     OpRegImm64(kOpAdd, rs_sp, early_drop);
+    cfi_.AdjustCFAOffset(-early_drop);
   }
 
   // Unspill.
@@ -1707,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
   }
 
   // Drop the (rest of) the frame.
-  OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
+  int adjust = frame_size - early_drop;
+  OpRegImm64(kOpAdd, rs_sp, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 09a34bf022..c5c0dc5447 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -606,7 +606,8 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat
 
 Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()) {
+      call_method_insns_(arena->Adapter()),
+      dex_cache_access_insns_(arena->Adapter()) {
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kA64Last; i++) {
     DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i)
@@ -846,8 +847,9 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
 }
 
 void Arm64Mir2Lir::InstallLiteralPools() {
+  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
   // PC-relative calls to methods.
-  patches_.reserve(call_method_insns_.size());
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kA64Bl1t);
       uint32_t target_method_idx = p->operands[1];
@@ -856,6 +858,18 @@ void Arm64Mir2Lir::InstallLiteralPools() {
                                                         target_dex_file, target_method_idx));
   }
 
+  // PC-relative references to dex cache arrays.
+  for (LIR* p : dex_cache_access_insns_) {
+    DCHECK(p->opcode == kA64Adrp2xd || p->opcode == kA64Ldr3rXD);
+    const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]);
+    DCHECK_EQ(adrp->opcode, kA64Adrp2xd);
+    const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]);
+    uint32_t offset = adrp->operands[3];
+    DCHECK(!p->flags.is_nop);
+    DCHECK(!adrp->flags.is_nop);
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset));
+  }
+
   // And do the normal processing.
   Mir2Lir::InstallLiteralPools();
 }
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index f48290d6f7..e9ad8ba175 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -589,13 +589,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r
       DCHECK_EQ(shift, 0);
       // Binary, but rm is encoded twice.
       return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
-      break;
     case kOpRevsh:
       // Binary, but rm is encoded twice.
       NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
       // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
       return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
-      break;
     case kOp2Byte:
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
       // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
@@ -645,10 +643,9 @@ LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage
       // Note: intentional fallthrough
     case kOpSub:
       return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
-      break;
     default:
       LOG(FATAL) << "Bad Opcode: " << opcode;
-      break;
+      UNREACHABLE();
   }
 
   DCHECK(!IsPseudoLirOp(opcode));
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index df72830801..5ea36c2769 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -29,6 +29,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "vmap_table.h"
@@ -202,12 +203,17 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) {
 
   /* Handle pseudo-ops individually, and all regular insns as a group */
   switch (lir->opcode) {
-    case kPseudoMethodEntry:
-      LOG(INFO) << "-------- method entry "
-                << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    case kPseudoPrologueBegin:
+      LOG(INFO) << "-------- PrologueBegin";
       break;
-    case kPseudoMethodExit:
-      LOG(INFO) << "-------- Method_Exit";
+    case kPseudoPrologueEnd:
+      LOG(INFO) << "-------- PrologueEnd";
+      break;
+    case kPseudoEpilogueBegin:
+      LOG(INFO) << "-------- EpilogueBegin";
+      break;
+    case kPseudoEpilogueEnd:
+      LOG(INFO) << "-------- EpilogueEnd";
       break;
     case kPseudoBarrier:
       LOG(INFO) << "-------- BARRIER";
@@ -266,8 +272,9 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) {
                                                lir, base_addr));
         std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
                                                     lir, base_addr));
-        LOG(INFO) << StringPrintf("%5p: %-9s%s%s",
+        LOG(INFO) << StringPrintf("%5p|0x%02x: %-9s%s%s",
                                   base_addr + offset,
+                                  lir->dalvik_offset,
                                   op_name.c_str(), op_operands.c_str(),
                                   lir->flags.is_nop ? "(nop)" : "");
       }
@@ -534,13 +541,11 @@ void Mir2Lir::InstallSwitchTables() {
         DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
         bx_offset = tab_rec->anchor->offset + 4;
         break;
-      case kX86:
-        bx_offset = 0;
-        break;
       case kX86_64:
         // RIP relative to switch table.
         bx_offset = tab_rec->offset;
         break;
+      case kX86:
       case kArm64:
       case kMips:
       case kMips64:
@@ -712,14 +717,17 @@ void Mir2Lir::CreateMappingTables() {
   DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size);
   uint8_t* write_pos2 = write_pos + pc2dex_data_size;
 
+  bool is_in_prologue_or_epilogue = false;
   pc2dex_offset = 0u;
   pc2dex_dalvik_offset = 0u;
   dex2pc_offset = 0u;
   dex2pc_dalvik_offset = 0u;
   for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) {
-    if (generate_src_map && !tgt_lir->flags.is_nop) {
-      src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset,
-              static_cast<int32_t>(tgt_lir->dalvik_offset)}));
+    if (generate_src_map && !tgt_lir->flags.is_nop && tgt_lir->opcode >= 0) {
+      if (!is_in_prologue_or_epilogue) {
+        src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset,
+                static_cast<int32_t>(tgt_lir->dalvik_offset)}));
+      }
     }
     if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) {
       DCHECK(pc2dex_offset <= tgt_lir->offset);
@@ -737,6 +745,12 @@ void Mir2Lir::CreateMappingTables() {
       dex2pc_offset = tgt_lir->offset;
       dex2pc_dalvik_offset = tgt_lir->dalvik_offset;
     }
+    if (tgt_lir->opcode == kPseudoPrologueBegin || tgt_lir->opcode == kPseudoEpilogueBegin) {
+      is_in_prologue_or_epilogue = true;
+    }
+    if (tgt_lir->opcode == kPseudoPrologueEnd || tgt_lir->opcode == kPseudoEpilogueEnd) {
+      is_in_prologue_or_epilogue = false;
+    }
   }
   DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]),
             hdr_data_size + pc2dex_data_size);
@@ -1053,6 +1067,12 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       mem_ref_type_(ResourceMask::kHeapRef),
       mask_cache_(arena),
       safepoints_(arena->Adapter()),
+      dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
+      pc_rel_temp_(nullptr),
+      dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
+      cfi_(&last_lir_insn_,
+           cu->compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(),
+           arena),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -1137,14 +1157,6 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
     return lhs.LiteralOffset() < rhs.LiteralOffset();
   });
 
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ?
-          ReturnFrameDescriptionEntry() :
-          nullptr);
-  ArrayRef<const uint8_t> cfi_ref;
-  if (cfi_info.get() != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*cfi_info);
-  }
   return CompiledMethod::SwapAllocCompiledMethod(
       cu_->compiler_driver, cu_->instruction_set,
       ArrayRef<const uint8_t>(code_buffer_),
@@ -1153,8 +1165,8 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
       ArrayRef<const uint8_t>(encoded_mapping_table_),
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
-      cfi_ref,
-      ArrayRef<LinkerPatch>(patches_));
+      ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())),
+      ArrayRef<const LinkerPatch>(patches_));
 }
 
 size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
@@ -1304,9 +1316,15 @@ void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
   OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
 }
 
-std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
-  // Default case is to do nothing.
-  return nullptr;
+bool Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return false;
+}
+
+void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED,
+                                       int offset ATTRIBUTE_UNUSED,
+                                       RegStorage r_dest ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "No generic implementation.";
+  UNREACHABLE();
 }
 
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8e3f4ef726..4ac6c0c5b5 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -413,6 +413,17 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier)
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
+InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  if (it != inline_methods_.end()) {
+    DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0);
+    return it->second.flags;
+  } else {
+    return kNoInlineMethodFlags;
+  }
+}
+
 bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index cb521da9df..d1e562119c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -65,6 +65,11 @@ class DexFileMethodInliner {
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
     /**
+     * Check whether a particular method index corresponds to an intrinsic or special function.
+     */
+    InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+    /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
     bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2bcaaca822..b132c4cc54 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -24,12 +24,14 @@
 #include "dex/mir_graph.h"
 #include "dex/quick/arm/arm_lir.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_reference.h"
 #include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -56,6 +58,133 @@ ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) {
   return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
 }
 
+void Mir2Lir::GenIfNullUseHelperImmMethod(
+    RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) {
+  class CallHelperImmMethodSlowPath : public LIRSlowPath {
+   public:
+    CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont,
+                                QuickEntrypointEnum trampoline_in, int imm_in,
+                                RegStorage r_method_in, RegStorage r_result_in)
+        : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in),
+          imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) {
+    }
+
+    void Compile() {
+      GenerateTargetLabel();
+      if (r_method_.Valid()) {
+        m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true);
+      } else {
+        m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true);
+      }
+      m2l_->OpRegCopy(r_result_,  m2l_->TargetReg(kRet0, kRef));
+      m2l_->OpUnconditionalBranch(cont_);
+    }
+
+   private:
+    QuickEntrypointEnum trampoline_;
+    const int imm_;
+    const RegStorage r_method_;
+    const RegStorage r_result_;
+  };
+
+  LIR* branch = OpCmpImmBranch(kCondEq, r_result, 0, NULL);
+  LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+  AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm,
+                                                       r_method, r_result));
+}
+
+RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
+                                               int opt_flags) {
+  DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
+  // May do runtime call so everything to home locations.
+  FlushAllRegs();
+  RegStorage r_base = TargetReg(kArg0, kRef);
+  LockTemp(r_base);
+  RegStorage r_method = RegStorage::InvalidReg();  // Loaded lazily, maybe in the slow-path.
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+    OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
+  } else {
+    // Using fixed register to sync with possible call to runtime support.
+    r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+    LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
+                kNotVolatile);
+    int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
+    LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
+  }
+  // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved.
+  LIR* unresolved_branch = nullptr;
+  if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
+    // Check if r_base is nullptr.
+    unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr);
+  }
+  LIR* uninit_branch = nullptr;
+  if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
+    // Check if r_base is not yet initialized class.
+    RegStorage r_tmp = TargetReg(kArg2, kNotWide);
+    LockTemp(r_tmp);
+    uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                      mirror::Class::StatusOffset().Int32Value(),
+                                      mirror::Class::kStatusInitialized, nullptr, nullptr);
+    FreeTemp(r_tmp);
+  }
+  if (unresolved_branch != nullptr || uninit_branch != nullptr) {
+    //
+    // Slow path to ensure a class is initialized for sget/sput.
+    //
+    class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+     public:
+      // There are up to two branches to the static field slow path, the "unresolved" when the type
+      // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized.
+      // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path.
+      StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
+                          RegStorage r_base_in, RegStorage r_method_in)
+          : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
+            second_branch_(unresolved != nullptr ? uninit : nullptr),
+            storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+      }
+
+      void Compile() {
+        LIR* target = GenerateTargetLabel();
+        if (second_branch_ != nullptr) {
+          second_branch_->target = target;
+        }
+        if (r_method_.Valid()) {
+          // ArtMethod* was loaded in normal path - use it.
+          m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
+                                        true);
+        } else {
+          // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
+          m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
+        }
+        // Copy helper's result into r_base, a no-op on all but MIPS.
+        m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
+
+        m2l_->OpUnconditionalBranch(cont_);
+      }
+
+     private:
+      // Second branch to the slow path, or nullptr if there's only one branch.
+      LIR* const second_branch_;
+
+      const int storage_index_;
+      const RegStorage r_base_;
+      RegStorage r_method_;
+    };
+
+    // The slow path is invoked if the r_base is nullptr or the class pointed
+    // to by it is not initialized.
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
+                                                 field_info.StorageIndex(), r_base, r_method));
+  }
+  if (IsTemp(r_method)) {
+    FreeTemp(r_method);
+  }
+  return r_base;
+}
+
 /*
  * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
@@ -571,41 +700,6 @@ void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_
   CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true);
 }
 
-//
-// Slow path to ensure a class is initialized for sget/sput.
-//
-class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
- public:
-  // There are up to two branches to the static field slow path, the "unresolved" when the type
-  // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
-  // At least one will be non-null here, otherwise we wouldn't generate the slow path.
-  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                      RegStorage r_base)
-      : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
-        second_branch_(unresolved != nullptr ? uninit : nullptr),
-        storage_index_(storage_index), r_base_(r_base) {
-  }
-
-  void Compile() {
-    LIR* target = GenerateTargetLabel();
-    if (second_branch_ != nullptr) {
-      second_branch_->target = target;
-    }
-    m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
-    // Copy helper's result into r_base, a no-op on all but MIPS.
-    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
-
-    m2l_->OpUnconditionalBranch(cont_);
-  }
-
- private:
-  // Second branch to the slow path, or null if there's only one branch.
-  LIR* const second_branch_;
-
-  const int storage_index_;
-  const RegStorage r_base_;
-};
-
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
@@ -615,65 +709,23 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
-      if (IsTemp(rl_method.reg)) {
-        FreeTemp(rl_method.reg);
-      }
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized.
-      // TODO: remove initialized check now that we are initializing classes in the compiler driver.
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
-      }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and store of value don't re-order.
-          // TODO: Presumably the actual value store is control-dependent on the status load,
-          // and will thus not be reordered in any case, since stores are never speculated.
-          // Does later code "know" that the class is now initialized?  If so, we still
-          // need the barrier to guard later static loads.
-          GenMemBarrier(kLoadAny);
-        }
+        // Ensure load of status and store of value don't re-order.
+        // TODO: Presumably the actual value store is control-dependent on the status load,
+        // and will thus not be reordered in any case, since stores are never speculated.
+        // Does later code "know" that the class is now initialized?  If so, we still
+        // need the barrier to guard later static loads.
+        GenMemBarrier(kLoadAny);
       }
-      FreeTemp(r_method);
     }
     // rBase now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -735,57 +787,19 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method  = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadAny);
       }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and load of value don't re-order.
-          GenMemBarrier(kLoadAny);
-        }
-      }
-      FreeTemp(r_method);
     }
     // r_base now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -1022,64 +1036,41 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
 }
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
-  RegLocation rl_method = LoadCurrMethod();
-  CheckRegLocation(rl_method);
-  RegStorage res_reg = AllocTempRef();
+  RegLocation rl_result;
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                         *cu_->dex_file,
                                                         type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    CallRuntimeHelperImmReg(kQuickInitializeTypeAndVerifyAccess, type_idx, rl_method.reg, true);
-    RegLocation rl_result = GetReturn(kRefReg);
-    StoreValue(rl_dest, rl_result);
+    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+    rl_result = GetReturn(kRefReg);
   } else {
-    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-    // We're don't need access checks, load type from dex cache
-    int32_t dex_cache_offset =
-        mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg, kNotVolatile);
-    int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
+    rl_result = EvalLoc(rl_dest, kRefReg, true);
+    // We don't need access checks, load type from dex cache
+    RegStorage r_method = RegStorage::InvalidReg();
+    if (CanUseOpPcRelDexCacheArrayLoad()) {
+      size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
+    } else {
+      RegLocation rl_method = LoadCurrMethod();
+      CheckRegLocation(rl_method);
+      r_method = rl_method.reg;
+      int32_t dex_cache_offset =
+          mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
+      RegStorage res_reg = AllocTempRef();
+      LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile);
+      int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+      LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
+      FreeTemp(res_reg);
+    }
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || ForceSlowTypePath(cu_)) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      LIR* branch = OpCmpImmBranch(kCondEq, rl_result.reg, 0, NULL);
-      LIR* cont = NewLIR0(kPseudoTargetLabel);
-
-      // Object to generate the slow path for class resolution.
-      class SlowPath : public LIRSlowPath {
-       public:
-        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegLocation& rl_method_in, const RegLocation& rl_result_in)
-            : LIRSlowPath(m2l, fromfast, cont_in),
-              type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
-        }
-
-        void Compile() {
-          GenerateTargetLabel();
-
-          m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, rl_method_.reg, true);
-          m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetReg(kRet0, kRef));
-          m2l_->OpUnconditionalBranch(cont_);
-        }
-
-       private:
-        const int type_idx_;
-        const RegLocation rl_method_;
-        const RegLocation rl_result_;
-      };
-
-      // Add to list for future.
-      AddSlowPath(new (arena_) SlowPath(this, branch, cont, type_idx, rl_method, rl_result));
-
-      StoreValue(rl_dest, rl_result);
-     } else {
-      // Fast path, we're done - just store result
-      StoreValue(rl_dest, rl_result);
+      GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method);
     }
   }
+  StoreValue(rl_dest, rl_result);
 }
 
 void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
@@ -1092,64 +1083,42 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
 
-    // If the Method* is already in a register, we can save a copy.
-    RegLocation rl_method = mir_graph_->GetMethodLoc();
-    RegStorage r_method;
-    if (rl_method.location == kLocPhysReg) {
-      // A temp would conflict with register use below.
-      DCHECK(!IsTemp(rl_method.reg));
-      r_method = rl_method.reg;
-    } else {
-      r_method = TargetReg(kArg2, kRef);
-      LoadCurrMethodDirect(r_method);
-    }
-    // Method to declaring class.
-    LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
-                TargetReg(kArg0, kRef), kNotVolatile);
-    // Declaring class to dex cache strings.
-    LoadRefDisp(TargetReg(kArg0, kRef), mirror::Class::DexCacheStringsOffset().Int32Value(),
-                TargetReg(kArg0, kRef), kNotVolatile);
-
     // Might call out to helper, which will return resolved string in kRet0
-    LoadRefDisp(TargetReg(kArg0, kRef), offset_of_string, TargetReg(kRet0, kRef), kNotVolatile);
-    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0, kRef), 0, NULL);
-    LIR* cont = NewLIR0(kPseudoTargetLabel);
-
-    {
-      // Object to generate the slow path for string resolution.
-      class SlowPath : public LIRSlowPath {
-       public:
-        SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in,
-                 int32_t string_idx_in)
-            : LIRSlowPath(m2l, fromfast_in, cont_in),
-              r_method_(r_method_in), string_idx_(string_idx_in) {
-        }
-
-        void Compile() {
-          GenerateTargetLabel();
-          m2l_->CallRuntimeHelperImmReg(kQuickResolveString, string_idx_, r_method_, true);
-          m2l_->OpUnconditionalBranch(cont_);
-        }
-
-       private:
-         const RegStorage r_method_;
-         const int32_t string_idx_;
-      };
-
-      AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method, string_idx));
+    RegStorage ret0 = TargetReg(kRet0, kRef);
+    RegStorage r_method = RegStorage::InvalidReg();
+    if (CanUseOpPcRelDexCacheArrayLoad()) {
+      size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
+      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0);
+    } else {
+      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      // Method to declaring class.
+      RegStorage arg0 = TargetReg(kArg0, kRef);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                  arg0, kNotVolatile);
+      // Declaring class to dex cache strings.
+      LoadRefDisp(arg0, mirror::Class::DexCacheStringsOffset().Int32Value(), arg0, kNotVolatile);
+
+      LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile);
     }
+    GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method);
 
     GenBarrier();
     StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
-    RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTempRef();
     RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-    LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg,
-                kNotVolatile);
-    LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg,
-                kNotVolatile);
-    LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
+    if (CanUseOpPcRelDexCacheArrayLoad()) {
+      size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
+      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
+    } else {
+      RegLocation rl_method = LoadCurrMethod();
+      RegStorage res_reg = AllocTempRef();
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg,
+                  kNotVolatile);
+      LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg,
+                  kNotVolatile);
+      LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
+      FreeTemp(res_reg);
+    }
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1224,14 +1193,20 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re
   RegStorage check_class = AllocTypedTemp(false, kRefReg);
   RegStorage object_class = AllocTypedTemp(false, kRefReg);
 
-  LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
-    LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class,
+    RegStorage r_method = LoadCurrMethodWithHint(check_class);
+    LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class,
+                kNotVolatile);
+    LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class,
                 kNotVolatile);
+  } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+    size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+    OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, check_class);
     LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class,
                 kNotVolatile);
   } else {
-    LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    RegStorage r_method = LoadCurrMethodWithHint(check_class);
+    LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 check_class, kNotVolatile);
     LoadRefDisp(object.reg,  mirror::Object::ClassOffset().Int32Value(), object_class,
                 kNotVolatile);
@@ -1267,20 +1242,19 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  RegStorage method_reg = TargetReg(kArg1, kRef);
-  LoadCurrMethodDirect(method_reg);   // kArg1 <= current Method*
   RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
   RegStorage ref_reg = TargetReg(kArg0, kRef);  // kArg0 will hold the ref.
   RegStorage ret_reg = GetReturn(kRefReg).reg;
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
-    CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
     OpRegCopy(class_reg, ret_reg);  // Align usage with fast path
     LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
   } else if (use_declaring_class) {
+    RegStorage r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
     LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     if (can_assume_type_is_in_dex_cache) {
@@ -1288,42 +1262,23 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
     }
 
-    // Load dex cache entry into class_reg (kArg2)
-    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                class_reg, kNotVolatile);
-    int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+    RegStorage r_method = RegStorage::InvalidReg();
+    if (CanUseOpPcRelDexCacheArrayLoad()) {
+      size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
+    } else {
+      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      // Load dex cache entry into class_reg (kArg2)
+      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                  class_reg, kNotVolatile);
+      int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+      LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+    }
     if (!can_assume_type_is_in_dex_cache) {
-      LIR* slow_path_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
-      LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
 
       // Should load value here.
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
-
-      class InitTypeSlowPath : public Mir2Lir::LIRSlowPath {
-       public:
-        InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in,
-                         RegLocation rl_src_in)
-            : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in),
-              rl_src_(rl_src_in) {
-        }
-
-        void Compile() OVERRIDE {
-          GenerateTargetLabel();
-
-          m2l_->CallRuntimeHelperImm(kQuickInitializeType, type_idx_, true);
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kRef),
-                          m2l_->TargetReg(kRet0, kRef));  // Align usage with fast path
-          m2l_->OpUnconditionalBranch(cont_);
-        }
-
-       private:
-        uint32_t type_idx_;
-        RegLocation rl_src_;
-      };
-
-      AddSlowPath(new (arena_) InitTypeSlowPath(this, slow_path_branch, slow_path_target,
-                                                type_idx, rl_src));
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
@@ -1426,55 +1381,34 @@ void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx,
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  RegStorage method_reg = TargetReg(kArg1, kRef);
-  LoadCurrMethodDirect(method_reg);  // kArg1 <= current Method*
   RegStorage class_reg = TargetReg(kArg2, kRef);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
-    CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
     OpRegCopy(class_reg, TargetReg(kRet0, kRef));  // Align usage with fast path
   } else if (use_declaring_class) {
+    RegStorage method_reg = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
     LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
-                class_reg, kNotVolatile);
-    int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+    RegStorage r_method = RegStorage::InvalidReg();
+    if (CanUseOpPcRelDexCacheArrayLoad()) {
+      size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
+    } else {
+      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+
+      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                  class_reg, kNotVolatile);
+      int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+      LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+    }
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
-      LIR* cont = NewLIR0(kPseudoTargetLabel);
-
-      // Slow path to initialize the type.  Executed if the type is NULL.
-      class SlowPath : public LIRSlowPath {
-       public:
-        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
-                 const RegStorage class_reg_in)
-            : LIRSlowPath(m2l, fromfast, cont_in),
-              type_idx_(type_idx_in), class_reg_(class_reg_in) {
-        }
-
-        void Compile() {
-          GenerateTargetLabel();
-
-          // Call out to helper, which will return resolved type in kArg0
-          // InitializeTypeFromCode(idx, method)
-          m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_,
-                                        m2l_->TargetReg(kArg1, kRef), true);
-          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0, kRef));  // Align usage with fast path
-          m2l_->OpUnconditionalBranch(cont_);
-        }
-
-       public:
-        const int type_idx_;
-        const RegStorage class_reg_;
-      };
-
-      AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont, type_idx, class_reg));
+      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
     }
   }
   // At this point, class_reg (kArg2) has class
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2d41ba1795..db7095dafb 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -24,6 +24,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
@@ -1434,10 +1435,12 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
 
 void Mir2Lir::GenInvoke(CallInfo* info) {
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  const DexFile* dex_file = info->method_ref.dex_file;
-  if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file)
-      ->GenIntrinsic(this, info)) {
-    return;
+  if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
+    const DexFile* dex_file = info->method_ref.dex_file;
+    auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
+    if (inliner->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   GenInvokeNoInline(info);
 }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index b71691f20a..54e5742837 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -340,6 +340,20 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) {
   LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
 }
 
+RegStorage Mir2Lir::LoadCurrMethodWithHint(RegStorage r_hint) {
+  // If the method is promoted to a register, return that register, otherwise load it to r_hint.
+  // (Replacement for LoadCurrMethod() usually used when LockCallTemps() is in effect.)
+  DCHECK(r_hint.Valid());
+  RegLocation rl_method = mir_graph_->GetMethodLoc();
+  if (rl_method.location == kLocPhysReg) {
+    DCHECK(!IsTemp(rl_method.reg));
+    return rl_method.reg;
+  } else {
+    LoadCurrMethodDirect(r_hint);
+    return r_hint;
+  }
+}
+
 RegLocation Mir2Lir::LoadCurrMethod() {
   return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
 }
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
new file mode 100644
index 0000000000..5cfb0ff557
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lazy_debug_frame_opcode_writer.h"
+#include "mir_to_lir.h"
+
+namespace art {
+namespace dwarf {
+
+const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) {
+  if (!this->enabled_) {
+    DCHECK(this->data()->empty());
+    return this->data();
+  }
+  if (!patched_) {
+    patched_ = true;
+    // Move our data buffer to temporary variable.
+    ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator());
+    old_opcodes.swap(this->opcodes_);
+    // Refill our data buffer with patched opcodes.
+    this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4);
+    size_t pos = 0;
+    for (auto advance : advances_) {
+      DCHECK_GE(advance.pos, pos);
+      // Copy old data up to the point when advance was issued.
+      this->opcodes_.insert(this->opcodes_.end(),
+                            old_opcodes.begin() + pos,
+                            old_opcodes.begin() + advance.pos);
+      pos = advance.pos;
+      // This may be null if there is no slow-path code after return.
+      LIR* next_lir = NEXT_LIR(advance.last_lir_insn);
+      // Insert the advance command with its final offset.
+      Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size);
+    }
+    // Copy the final segment.
+    this->opcodes_.insert(this->opcodes_.end(),
+                          old_opcodes.begin() + pos,
+                          old_opcodes.end());
+    Base::AdvancePC(code_size);
+  }
+  return this->data();
+}
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
new file mode 100644
index 0000000000..94ffd7f957
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dwarf/debug_frame_opcode_writer.h"
+
+namespace art {
+struct LIR;
+namespace dwarf {
+
+// When we are generating the CFI code, we do not know the instuction offsets,
+// this class stores the LIR references and patches the instruction stream later.
+class LazyDebugFrameOpCodeWriter FINAL
+    : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
+  typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+ public:
+  // This method is implicitely called the by opcode writers.
+  virtual void ImplicitlyAdvancePC() OVERRIDE {
+    DCHECK_EQ(patched_, false);
+    DCHECK_EQ(this->current_pc_, 0);
+    advances_.push_back({this->data()->size(), *last_lir_insn_});
+  }
+
+  const ArenaVector<uint8_t>* Patch(size_t code_size);
+
+  explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes,
+                                      ArenaAllocator* allocator)
+    : Base(enable_writes, allocator->Adapter()),
+      last_lir_insn_(last_lir_insn),
+      advances_(allocator->Adapter()),
+      patched_(false) {
+  }
+
+ private:
+  typedef struct {
+    size_t pos;
+    LIR* last_lir_insn;
+  } Advance;
+
+  using Base::data;  // Hidden. Use Patch method instead.
+
+  LIR** last_lir_insn_;
+  ArenaVector<Advance> advances_;
+  bool patched_;
+
+  DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index e5738998a0..6cdf56773e 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -493,15 +493,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) {
       /* Found a slot to hoist to */
       if (slot >= 0) {
         LIR* cur_lir = prev_inst_list[slot];
-        LIR* new_load_lir =
-          static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-        *new_load_lir = *this_lir;
+        LIR* prev_lir = PREV_LIR(this_lir);
+        UnlinkLIR(this_lir);
         /*
          * Insertion is guaranteed to succeed since check_lir
          * is never the first LIR on the list
          */
-        InsertLIRBefore(cur_lir, new_load_lir);
-        NopLIR(this_lir);
+        InsertLIRBefore(cur_lir, this_lir);
+        this_lir = prev_lir;  // Continue the loop with the next LIR.
       }
     }
   }
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index de66b35418..05570e4bde 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live.
@@ -275,7 +280,6 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
    */
 
   skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, target);
-  NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocPtrSizeTemp();
   RegStorage new_sp = AllocPtrSizeTemp();
   const RegStorage rs_sp = TargetPtrReg(kSp);
@@ -305,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
         // RA is offset 0 since we push in reverse order.
         m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr));
         m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow);  // Doesn't clobber LR.
         m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */,
                          false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -319,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
     AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size));
     // TODO: avoid copy for small frame sizes.
     OpRegCopy(rs_sp, new_sp);  // Establish stack.
+    cfi_.AdjustCFAOffset(frame_sub);
   } else {
     OpRegImm(kOpSub, rs_sp, frame_sub);
+    cfi_.AdjustCFAOffset(frame_sub);
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -338,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 }
 
 void MipsMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -345,9 +354,11 @@ void MipsMir2Lir::GenExitSequence() {
   LockTemp(TargetPtrReg(kRet0));
   LockTemp(TargetPtrReg(kRet1));
 
-  NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
   OpReg(kOpBx, TargetPtrReg(kLr));
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void MipsMir2Lir::GenSpecialExitSequence() {
@@ -366,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() {
   fp_vmap_table_.clear();
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(frame_size_);
   StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4));
   StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0));
+  // Do not generate CFI for scratch register A0.
 }
 
 void MipsMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
   const RegStorage rs_sp = TargetPtrReg(kSp);
   LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.Restore(DwarfCoreReg(rRA));
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 /*
@@ -387,73 +403,73 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<uintptr_t>(-1)) {
-        if (cu->target64) {
-          cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+      case 0:  // Get the current Method* [sets kArg0]
+        if (direct_code != static_cast<uintptr_t>(-1)) {
+          if (cu->target64) {
+            cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+          } else {
+            cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+          }
         } else {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
-      } else {
-        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-      }
-      if (direct_method != static_cast<uintptr_t>(-1)) {
-        if (cu->target64) {
-          cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
+        if (direct_method != static_cast<uintptr_t>(-1)) {
+          if (cu->target64) {
+            cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
+          } else {
+            cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+          }
         } else {
-          cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+          cg->LoadMethodAddress(target_method, type, kArg0);
         }
-      } else {
-        cg->LoadMethodAddress(target_method, type, kArg0);
-      }
-      break;
-    default:
-      return -1;
+        break;
+      default:
+        return -1;
     }
   } else {
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
-    case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
-      // Set up direct code if known.
-      if (direct_code != 0) {
-        if (direct_code != static_cast<uintptr_t>(-1)) {
-          if (cu->target64) {
-            cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+      case 0:  // Get the current Method* [sets kArg0]
+        // TUNING: we can save a reg copy if Method* has been promoted.
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      case 1:  // Get method->dex_cache_resolved_methods_
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+        // Set up direct code if known.
+        if (direct_code != 0) {
+          if (direct_code != static_cast<uintptr_t>(-1)) {
+            if (cu->target64) {
+              cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+            } else {
+              cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+            }
           } else {
-            cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+            CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+            cg->LoadCodeAddress(target_method, type, kInvokeTgt);
           }
-        } else {
-          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
-      }
-      break;
-    case 2:  // Grab target method*
-      CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::
-                          OffsetOfElement(target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
-      break;
-    case 3:  // Grab the code from the method*
-      if (direct_code == 0) {
-        int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-            InstructionSetPointerSize(cu->instruction_set)).Int32Value();
-        // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
-        cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
+        break;
+      case 2:  // Grab target method*
+        CHECK_EQ(cu->dex_file, target_method.dex_file);
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::
+                        OffsetOfElement(target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+        break;
+      case 3:  // Grab the code from the method*
+        if (direct_code == 0) {
+          int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+              InstructionSetPointerSize(cu->instruction_set)).Int32Value();
+          // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
+          cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
+        }
+        break;
+      default:
+        return -1;
     }
   }
   return state + 1;
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 626b36ea28..1ca8bb618b 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -237,12 +237,12 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
         // note the operands are swapped for the mtc1 and mthc1 instr.
         // Here if dest is fp reg and src is core reg.
         if (fpuIs32Bit_) {
-            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-            NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+          NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
         } else {
-            r_dest = Fp64ToSolo32(r_dest);
-            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
-            NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
+          r_dest = Fp64ToSolo32(r_dest);
+          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
+          NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
         }
       }
     } else {
@@ -309,7 +309,13 @@ RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStor
 
 RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
   RegStorage t_reg = AllocTemp();
-  NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+  // lit is guarantee to be a 16-bit constant
+  if (IsUint<16>(lit)) {
+    NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit);
+  } else {
+    // Addiu will sign extend the entire width (32 or 64) of the register.
+    NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+  }
   RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
   FreeTemp(t_reg);
   return rl_result;
@@ -815,20 +821,20 @@ void MipsMir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   }
   OpKind op = kOpBkpt;
   switch (opcode) {
-  case Instruction::SHL_LONG:
-  case Instruction::SHL_LONG_2ADDR:
-    op = kOpLsl;
-    break;
-  case Instruction::SHR_LONG:
-  case Instruction::SHR_LONG_2ADDR:
-    op = kOpAsr;
-    break;
-  case Instruction::USHR_LONG:
-  case Instruction::USHR_LONG_2ADDR:
-    op = kOpLsr;
-    break;
-  default:
-    LOG(FATAL) << "Unexpected case: " << opcode;
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case: " << opcode;
   }
   rl_shift = LoadValue(rl_shift, kCoreReg);
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index a94fad7534..4c0bd8378b 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora
   return OpReg(kOpBlx, r_tgt);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() {
   int offset = num_core_spills_ * ptr_size;
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, offset);
+  cfi_.AdjustCFAOffset(offset);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       offset -= ptr_size;
       StoreWordDisp(rs_sp, offset,
                     cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.RelOffset(DwarfCoreReg(reg), offset);
     }
   }
 }
@@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() {
       offset -= ptr_size;
       LoadWordDisp(rs_sp, offset,
                    cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.Restore(DwarfCoreReg(reg));
     }
   }
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) {
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index bf0e0fc78b..8ab542270d 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -283,9 +283,9 @@ LIR* MipsMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
       break;
     case kOpBx:
       return NewLIR2(kMipsJalr, rZERO, r_dest_src.GetReg());
-      break;
     default:
       LOG(FATAL) << "Bad case in OpReg";
+      UNREACHABLE();
   }
   return NewLIR2(opcode, cu_->target64 ? rRAd : rRA, r_dest_src.GetReg());
 }
@@ -295,8 +295,8 @@ LIR* MipsMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
     return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
   } else {
     LOG(FATAL) << "Bad case in OpRegImm";
+    UNREACHABLE();
   }
-  UNREACHABLE();
 }
 
 LIR* MipsMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 0b480a09c6..961cd4f06b 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1250,10 +1250,17 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
   if (bb->block_type == kEntryBlock) {
     ResetRegPool();
     int start_vreg = mir_graph_->GetFirstInVR();
+    AppendLIR(NewLIR0(kPseudoPrologueBegin));
     GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc());
+    AppendLIR(NewLIR0(kPseudoPrologueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   } else if (bb->block_type == kExitBlock) {
     ResetRegPool();
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
+    AppendLIR(NewLIR0(kPseudoEpilogueBegin));
     GenExitSequence();
+    AppendLIR(NewLIR0(kPseudoEpilogueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   }
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index cca4e5a30a..db59714742 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -29,9 +29,11 @@
 #include "dex/quick/resource_mask.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "invoke_type.h"
+#include "lazy_debug_frame_opcode_writer.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
+#include "utils/dex_cache_arrays_layout.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -134,6 +136,7 @@ class BasicBlock;
 class BitVector;
 struct CallInfo;
 struct CompilationUnit;
+struct CompilerTemp;
 struct InlineMethod;
 class MIR;
 struct LIR;
@@ -141,6 +144,7 @@ struct RegisterInfo;
 class DexFileMethodInliner;
 class MIRGraph;
 class MirMethodLoweringInfo;
+class MirSFieldLoweringInfo;
 
 typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
                             const MethodReference& target_method,
@@ -632,7 +636,7 @@ class Mir2Lir {
     RegisterClass ShortyToRegClass(char shorty_type);
     RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
-    virtual void Materialize();
+    void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
     void MarkSafepointPCAfter(LIR* after);
@@ -773,9 +777,10 @@ class Mir2Lir {
      */
     virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
+    virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
+    virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
-    void DoPromotion();
+    virtual void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(RegisterClass reg_class);
@@ -956,6 +961,7 @@ class Mir2Lir {
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
     void LoadCurrMethodDirect(RegStorage r_tgt);
+    RegStorage LoadCurrMethodWithHint(RegStorage r_hint);
     virtual LIR* LoadConstant(RegStorage r_dest, int value);
     // Natural word size.
     LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
@@ -1093,6 +1099,18 @@ class Mir2Lir {
     virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
                                SpecialTargetRegister symbolic_reg);
 
+    // TODO: Support PC-relative dex cache array loads on all platforms and
+    // replace CanUseOpPcRelDexCacheArrayLoad() with dex_cache_arrays_layout_.Valid().
+    virtual bool CanUseOpPcRelDexCacheArrayLoad() const;
+
+    /*
+     * @brief Load an element of one of the dex cache arrays.
+     * @param dex_file the dex file associated with the target dex cache.
+     * @param offset the offset of the element in the fixed dex cache arrays' layout.
+     * @param r_dest the register where to load the element.
+     */
+    virtual void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest);
+
     // Routines that work for the generic case, but may be overriden by target.
     /*
      * @brief Compare memory to immediate, and branch if condition true.
@@ -1491,6 +1509,12 @@ class Mir2Lir {
       return 0;
     }
 
+    /**
+     * @brief Buffer of DWARF's Call Frame Information opcodes.
+     * @details It is used by debuggers and other tools to unwind the call stack.
+     */
+    dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; }
+
   protected:
     Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -1556,11 +1580,6 @@ class Mir2Lir {
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest,
                                     RegLocation rl_src);
-    /*
-     * @brief Generate the eh_frame FDE information if possible.
-     * @returns pointer to vector containg FDE information, or NULL.
-     */
-    virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
 
     /**
      * @brief Used to insert marker that can be used to associate MIR with LIR.
@@ -1596,7 +1615,6 @@ class Mir2Lir {
      */
     virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
-  protected:
     void ClobberBody(RegisterInfo* p);
     void SetCurrentDexPc(DexOffset dexpc) {
       current_dalvik_offset_ = dexpc;
@@ -1669,6 +1687,23 @@ class Mir2Lir {
      */
     bool GenSpecialIdentity(MIR* mir, const InlineMethod& special);
 
+    /**
+     * @brief Generate code to check if result is null and, if it is, call helper to load it.
+     * @param r_result the result register.
+     * @param trampoline the helper to call in slow path.
+     * @param imm the immediate passed to the helper.
+     * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid().
+     */
+    void GenIfNullUseHelperImmMethod(
+        RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
+
+    /**
+     * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
+     * @param field_info information about the field to be accessed.
+     * @param opt_flags the optimization flags of the MIR.
+     */
+    RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags);
+
     void AddDivZeroCheckSlowPath(LIR* branch);
 
     // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
@@ -1742,6 +1777,13 @@ class Mir2Lir {
     // Update references from prev_mir to mir.
     void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
 
+    /**
+     * Returns true if the frame spills the given core register.
+     */
+    bool CoreSpillMaskContains(int reg) {
+      return (core_spill_mask_ & (1u << reg)) != 0;
+    }
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1815,7 +1857,23 @@ class Mir2Lir {
     // Record the MIR that generated a given safepoint (nullptr for prologue safepoints).
     ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
 
-  protected:
+    // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
+    const DexCacheArraysLayout dex_cache_arrays_layout_;
+
+    // For architectures that don't have true PC-relative addressing, we can promote
+    // a PC of an instruction (or another PC-relative address such as a pointer to
+    // the dex cache arrays if supported) to a register. This is indicated to the
+    // register promotion by allocating a backend temp.
+    CompilerTemp* pc_rel_temp_;
+
+    // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
+    // above) and also have a limited range of offsets for loads, it's be useful to
+    // know the minimum offset into the dex cache arrays, so we calculate that as well
+    // if pc_rel_temp_ isn't nullptr.
+    uint32_t dex_cache_arrays_min_offset_;
+
+    dwarf::LazyDebugFrameOpCodeWriter cfi_;
+
     // ABI support
     class ShortyArg {
       public:
@@ -1875,6 +1933,8 @@ class Mir2Lir {
 
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
+
+    friend class QuickCFITest;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
new file mode 100644
index 0000000000..2e62166b7b
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "cfi_test.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
+#include "dex/pass_manager.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick/quick_compiler.h"
+#include "dex/quick/mir_to_lir.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "gtest/gtest.h"
+
+#include "dex/quick/quick_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class QuickCFITest : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Setup simple compiler context.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    CompilerOptions compiler_options(
+      CompilerOptions::kDefaultCompilerFilter,
+      CompilerOptions::kDefaultHugeMethodThreshold,
+      CompilerOptions::kDefaultLargeMethodThreshold,
+      CompilerOptions::kDefaultSmallMethodThreshold,
+      CompilerOptions::kDefaultTinyMethodThreshold,
+      CompilerOptions::kDefaultNumDexMethodsThreshold,
+      true,  // generate_gdb_information.
+      false,
+      CompilerOptions::kDefaultTopKProfileThreshold,
+      false,
+      true,  // include_debug_symbols.
+      false,
+      false,
+      false,
+      false,
+      nullptr,
+      new PassManagerOptions(),
+      nullptr,
+      false);
+    VerificationResults verification_results(&compiler_options);
+    DexFileToMethodInlinerMap method_inliner_map;
+    std::unique_ptr<const InstructionSetFeatures> isa_features;
+    std::string error;
+    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
+                          Compiler::kQuick, isa, isa_features.get(),
+                          false, 0, 0, 0, false, false, "", 0, -1, "");
+    ClassLinker* linker = nullptr;
+    CompilationUnit cu(&pool, isa, &driver, linker);
+    DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } };  // NOLINT
+    cu.mir_graph.reset(new MIRGraph(&cu, &arena));
+    cu.mir_graph->current_code_item_ = &code_item;
+
+    // Generate empty method with some spills.
+    std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr));
+    m2l->frame_size_ = 64u;
+    m2l->CompilerInitializeRegAlloc();
+    for (const auto& info : m2l->reg_pool_->core_regs_) {
+      if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_core_spills_++;
+      }
+    }
+    for (const auto& info : m2l->reg_pool_->sp_regs_) {
+      if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_fp_spills_++;
+      }
+    }
+    m2l->AdjustSpillMask();
+    m2l->GenEntrySequence(NULL, m2l->LocCReturnRef());
+    m2l->GenExitSequence();
+    m2l->HandleSlowPaths();
+    m2l->AssembleLIR();
+    std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    auto const& cfi_data = m2l->cfi().Patch(actual_asm.size());
+    std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end());
+    EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size()));
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(QuickCFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
new file mode 100644
index 0000000000..634fdeead0
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -0,0 +1,217 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: lsls r0, r0, #0
+// 0x00000014: .cfi_restore_state
+// 0x00000014: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
+    0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
+    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+    0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: sub sp, sp, #0x40 (64)
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp d8, d9, [sp, #24]
+// 0x00000008: .cfi_offset_extended: r72 at cfa-40
+// 0x00000008: .cfi_offset_extended: r73 at cfa-32
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x0000000c: str lr, [sp, #56]
+// 0x00000010: .cfi_offset: r30 at cfa-8
+// 0x00000010: str w0, [sp]
+// 0x00000014: .cfi_remember_state
+// 0x00000014: ldp d8, d9, [sp, #24]
+// 0x00000018: .cfi_restore_extended: r72
+// 0x00000018: .cfi_restore_extended: r73
+// 0x00000018: ldp x20, x21, [sp, #40]
+// 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
+// 0x0000001c: ldr lr, [sp, #56]
+// 0x00000020: .cfi_restore: r30
+// 0x00000020: add sp, sp, #0x40 (64)
+// 0x00000024: .cfi_def_cfa_offset: 0
+// 0x00000024: ret
+// 0x00000028: .cfi_restore_state
+// 0x00000028: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89,
+    0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4,
+    0x3C, 0xC3, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44,
+    0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: sub esp, 60
+// 0x00000003: .cfi_def_cfa_offset: 64
+// 0x00000003: mov [esp + 52], ebp
+// 0x00000007: .cfi_offset: r5 at cfa-12
+// 0x00000007: mov [esp + 56], esi
+// 0x0000000b: .cfi_offset: r6 at cfa-8
+// 0x0000000b: mov [esp], eax
+// 0x0000000e: .cfi_remember_state
+// 0x0000000e: mov ebp, [esp + 52]
+// 0x00000012: .cfi_restore: r5
+// 0x00000012: mov esi, [esp + 56]
+// 0x00000016: .cfi_restore: r6
+// 0x00000016: add esp, 60
+// 0x00000019: .cfi_def_cfa_offset: 4
+// 0x00000019: ret
+// 0x0000001a: addb [eax], al
+// 0x0000001c: .cfi_restore_state
+// 0x0000001c: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C,
+    0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F,
+    0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B,
+    0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10,
+    0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83,
+    0xC4, 0x38, 0xC3, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A,
+    0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47,
+    0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: subq rsp, 56
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: movq [rsp + 40], rbx
+// 0x00000009: .cfi_offset: r3 at cfa-24
+// 0x00000009: movq [rsp + 48], rbp
+// 0x0000000e: .cfi_offset: r6 at cfa-16
+// 0x0000000e: movsd [rsp + 24], xmm12
+// 0x00000015: .cfi_offset: r29 at cfa-40
+// 0x00000015: movsd [rsp + 32], xmm13
+// 0x0000001c: .cfi_offset: r30 at cfa-32
+// 0x0000001c: movq rax, rdi
+// 0x0000001f: mov [rsp], edi
+// 0x00000022: .cfi_remember_state
+// 0x00000022: movq rbx, [rsp + 40]
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: movq rbp, [rsp + 48]
+// 0x0000002c: .cfi_restore: r6
+// 0x0000002c: movsd xmm12, [rsp + 24]
+// 0x00000033: .cfi_restore: r29
+// 0x00000033: movsd xmm13, [rsp + 32]
+// 0x0000003a: .cfi_restore: r30
+// 0x0000003a: addq rsp, 56
+// 0x0000003e: .cfi_def_cfa_offset: 8
+// 0x0000003e: ret
+// 0x0000003f: addb al, al
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF,
+    0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F,
+    0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -12
+// 0x00000004: .cfi_def_cfa_offset: 12
+// 0x00000004: sw r18, +8(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-4
+// 0x00000008: sw r19, +4(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-8
+// 0x0000000c: sw r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-12
+// 0x00000010: addiu r29, r29, -52
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r18, +60(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: lw r19, +56(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: lw r31, +52(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: addiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jalr r0, r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF,
+    0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF,
+    0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -24
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: sd r18, +16(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-8
+// 0x00000008: sd r19, +8(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-16
+// 0x0000000c: sd r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-24
+// 0x00000010: daddiu r29, r29, -40
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: ld r18, +56(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: ld r19, +48(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: ld r31, +40(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: daddiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jr r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 6d289843e8..2c0bd47405 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -634,6 +634,12 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
     instruction_set = kThumb2;
   }
   CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker);
+  cu.dex_file = &dex_file;
+  cu.class_def_idx = class_def_idx;
+  cu.method_idx = method_idx;
+  cu.access_flags = access_flags;
+  cu.invoke_type = invoke_type;
+  cu.shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
 
   CHECK((cu.instruction_set == kThumb2) ||
         (cu.instruction_set == kArm64) ||
@@ -792,11 +798,16 @@ bool QuickCompiler::WriteElf(art::File* file,
                              const std::vector<const art::DexFile*>& dex_files,
                              const std::string& android_root,
                              bool is_host) const {
-  return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                       *GetCompilerDriver());
+  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+                                         *GetCompilerDriver());
+  } else {
+    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+                                         *GetCompilerDriver());
+  }
 }
 
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
+Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
   UNUSED(compilation_unit);
   Mir2Lir* mir_to_lir = nullptr;
   switch (cu->instruction_set) {
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 5153a9e82e..09b08ace77 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -60,7 +60,7 @@ class QuickCompiler : public Compiler {
     OVERRIDE
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const;
+  static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
 
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 741657bc69..e779479780 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -19,9 +19,11 @@
 #include "mir_to_lir-inl.h"
 
 #include "dex/compiler_ir.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -1128,6 +1130,152 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
   return loc;
 }
 
+void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  // NOTE: This should be in sync with functions that actually generate code for
+  // the opcodes below. However, if we get this wrong, the generated code will
+  // still be correct even if it may be sub-optimal.
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_method = false;
+  bool uses_pc_rel_load = false;
+  uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
+  switch (opcode) {
+    case Instruction::CHECK_CAST:
+    case Instruction::INSTANCE_OF: {
+      if ((opcode == Instruction::CHECK_CAST) &&
+          (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+        break;  // No code generated.
+      }
+      uint32_t type_idx =
+          (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
+      bool type_known_final, type_known_abstract, use_declaring_class;
+      bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
+          cu_->method_idx, *cu_->dex_file, type_idx,
+          &type_known_final, &type_known_abstract, &use_declaring_class);
+      if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
+          cu_->compiler_driver->IsSafeCast(
+              mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
+        break;  // No code generated.
+      }
+      if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::CONST_CLASS:
+      if (CanUseOpPcRelDexCacheArrayLoad() &&
+          cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+                                                           mir->dalvikInsn.vB)) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
+      InvokeType sharp_type = info.GetSharpType();
+      if (info.IsIntrinsic()) {
+        // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
+      } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+        // Nothing to do, the generated code or entrypoint uses method from the stack.
+      } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
+        // Nothing to do, the generated code uses method from the stack.
+      } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;
+        dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE:
+    case Instruction::NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY_RANGE:
+      uses_method = true;
+      break;
+    case Instruction::FILL_ARRAY_DATA:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+    case Instruction::THROW:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+      bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
+          ? field_info.FastGet()
+          : field_info.FastPut();
+      if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
+        if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
+          uses_pc_rel_load = true;  // And ignore method use in slow path.
+          dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+        } else {
+          uses_method = true;
+        }
+      } else {
+        // Nothing to do, the entrypoint uses method from the stack.
+      }
+      break;
+    }
+
+    default:
+      break;
+  }
+  if (uses_method) {
+    core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
+  }
+  if (uses_pc_rel_load) {
+    if (pc_rel_temp_ != nullptr) {
+      core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+      DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+      dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+    } else {
+      // Nothing to do, using PC-relative addressing without promoting base PC to register.
+    }
+  }
+}
+
 /* USE SSA names to count references of base Dalvik v_regs. */
 void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
@@ -1157,6 +1305,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num
       }
     }
   }
+
+  // Now analyze the ArtMethod* and pc_rel_temp_ uses.
+  DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
+  if (pc_rel_temp_ != nullptr) {
+    DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
+  }
+  PreOrderDfsIterator iter(mir_graph_);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    if (bb->block_type == kDead) {
+      continue;
+    }
+    uint32_t weight = mir_graph_->GetUseCountWeight(bb);
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      AnalyzeMIR(core_counts, mir, weight);
+    }
+  }
 }
 
 /* qsort callback function, sort descending */
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 118ab1d843..af19f5eaed 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -544,7 +544,6 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4, false }, "CallI", "!0d" },
   { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0, false }, "Ret", "" },
 
-  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | REG_DEF0 | SETS_CCODES,  { 0, 0, 0,    0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" },
   { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
   { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr",      "!0r,!1p" },
   { kX86RepneScasw,    kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
@@ -865,13 +864,6 @@ size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
         DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
       }
-    case kMacro:  // lir operands - 0: reg
-      DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
-      return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
-                      lir->operands[0], NO_REG, NO_REG, 0) -
-              // Shorter ax encoding.
-              (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
     case kUnimplemented:
       break;
   }
@@ -1586,8 +1578,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
                            int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
-    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
-    disp = tab_rec->offset;
+    const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp);
+    disp = tab_rec->offset - tab_rec->anchor->offset;
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
     const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
@@ -1621,23 +1613,6 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
-  DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
-  code_buffer_.push_back(0xE8);  // call +0
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  code_buffer_.push_back(0x58 + low_reg);  // pop reg
-
-  EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
-             raw_reg, offset + 5 /* size of call +0 */);
-}
-
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
   UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
                          << BuildInsnString(entry->fmt, lir, 0);
@@ -1780,7 +1755,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
               // Offset is relative to next instruction.
               lir->operands[2] = target - (lir->offset + lir->flags.size);
             } else {
-              lir->operands[2] = target;
+              const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]);
+              lir->operands[2] = target - anchor->offset;
               int newSize = GetInsnSize(lir);
               if (newSize != lir->flags.size) {
                 lir->flags.size = newSize;
@@ -1951,9 +1927,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                   lir->operands[3], lir->operands[4]);
         break;
-      case kMacro:  // lir operands - 0: reg
-        EmitMacro(entry, lir->operands[0], lir->offset);
-        break;
       case kNop:  // TODO: these instruction kinds are missing implementations.
       case kThreadReg:
       case kRegArrayImm:
@@ -2044,9 +2017,13 @@ void X86Mir2Lir::AssembleLIR() {
   cu_->NewTimingSplit("Assemble");
 
   // We will remove the method address if we never ended up using it
-  if (store_method_addr_ && !store_method_addr_used_) {
-    setup_method_address_[0]->flags.is_nop = true;
-    setup_method_address_[1]->flags.is_nop = true;
+  if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) {
+    if (kIsDebugBuild) {
+      LOG(WARNING) << "PC-relative addressing base promoted but unused in "
+          << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    }
+    setup_pc_rel_base_reg_->flags.is_nop = true;
+    NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true;
   }
 
   AssignOffsets();
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index abee87254b..d7a5eb04db 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -21,9 +21,11 @@
 #include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "gc/accounting/card_table.h"
 #include "mirror/art_method.h"
 #include "mirror/object_array-inl.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -95,29 +97,23 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat
 
     // Add the offset from the table to the table base.
     OpRegReg(kOpAdd, addr_for_jump, table_base);
+    tab_rec->anchor = nullptr;  // Unused for x86-64.
   } else {
-    // Materialize a pointer to the switch table.
-    RegStorage start_of_method_reg;
-    if (base_of_code_ != nullptr) {
-      // We can use the saved value.
-      RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-      rl_method = LoadValue(rl_method, kCoreReg);
-      start_of_method_reg = rl_method.reg;
-      store_method_addr_used_ = true;
-    } else {
-      start_of_method_reg = AllocTempRef();
-      NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
-    }
+    // Get the PC to a register and get the anchor.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor);
+
     // Load the displacement from the switch table.
     addr_for_jump = AllocTemp();
-    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(),
             2, WrapPointer(tab_rec));
-    // Add displacement to start of method.
-    OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg);
+    // Add displacement and r_pc to get the address.
+    OpRegReg(kOpAdd, addr_for_jump, r_pc);
+    tab_rec->anchor = anchor;
   }
 
   // ..and go!
-  tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg());
+  NewLIR1(kX86JmpR, addr_for_jump.GetReg());
 
   /* branch_over target here */
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -148,6 +144,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
    * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live.  Let the register
@@ -182,10 +182,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   }
 
   /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ -
-                              GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set));
+  OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.DefCFAOffset(frame_size_);
 
-  NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
   SpillFPRegs();
@@ -201,10 +201,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         GenerateTargetLabel(kPseudoThrowTarget);
         const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
         m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         // Assumes codegen and target are in thumb2 mode.
         m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow,
                          false /* MarkSafepointPC */, false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -235,14 +237,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 
   FlushIns(ArgLocs, rl_method);
 
-  if (base_of_code_ != nullptr) {
-    RegStorage method_start = TargetPtrReg(kArg0);
-    // We have been asked to save the address of the method start for later use.
-    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg());
-    int displacement = SRegOffset(base_of_code_->s_reg_low);
-    // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreBaseDisp(rs_rSP, displacement, method_start,
-                                             cu_->target64 ? k64 : k32, kNotVolatile);
+  // We can promote the PC of an anchor for PC-relative addressing to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (pc_rel_base_reg_.Valid()) {
+    DCHECK(!cu_->target64);
+    setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_);
   }
 
   FreeTemp(arg0);
@@ -251,6 +251,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 }
 
 void X86Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -258,14 +259,18 @@ void X86Mir2Lir::GenExitSequence() {
   LockTemp(rs_rX86_RET0);
   LockTemp(rs_rX86_RET1);
 
-  NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
   UnSpillFPRegs();
   /* Remove frame except for return address */
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  stack_increment_ = OpRegImm(kOpAdd, rs_rSP,
-                              frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
+  OpRegImm(kOpAdd, rs_rSP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
+  // There is only the return PC on the stack now.
   NewLIR0(kX86Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void X86Mir2Lir::GenSpecialExitSequence() {
@@ -276,6 +281,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
   // Keep 16-byte stack alignment, there's already the return address, so
   //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
   //   - for 64-bit push RAX, i.e. ArtMethod*.
+  const int kRegSize = cu_->target64 ? 8 : 4;
+  cfi_.SetCurrentCFAOffset(kRegSize);  // Return address.
   if (!cu_->target64) {
     DCHECK(!IsTemp(rs_rSI));
     DCHECK(!IsTemp(rs_rDI));
@@ -293,17 +300,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
   fp_vmap_table_.clear();
   if (!cu_->target64) {
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0);
     NewLIR1(kX86Push32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0);
   }
   NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(kRegSize);
+  // Do not generate CFI for scratch register.
 }
 
 void X86Mir2Lir::GenSpecialExitForSuspend() {
+  const int kRegSize = cu_->target64 ? 8 : 4;
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(-kRegSize);
   if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
   }
 }
 
@@ -321,13 +340,13 @@ void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
-                             int state, const MethodReference& target_method,
-                             uint32_t,
-                             uintptr_t direct_code, uintptr_t direct_method,
-                             InvokeType type) {
+int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                                  int state, const MethodReference& target_method,
+                                  uint32_t,
+                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  InvokeType type) {
   UNUSED(info, direct_code);
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+  X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get());
   if (direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
@@ -345,6 +364,17 @@ static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
     default:
       return -1;
     }
+  } else if (cg->CanUseOpPcRelDexCacheArrayLoad()) {
+    switch (state) {
+      case 0: {
+        CHECK_EQ(cu->dex_file, target_method.dex_file);
+        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, cg->TargetReg(kArg0, kRef));
+        break;
+      }
+      default:
+        return -1;
+    }
   } else {
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 040a8c4bef..72580a3e39 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,7 +28,7 @@
 
 namespace art {
 
-class X86Mir2Lir : public Mir2Lir {
+class X86Mir2Lir FINAL : public Mir2Lir {
  protected:
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
@@ -104,6 +104,9 @@ class X86Mir2Lir : public Mir2Lir {
   /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
   void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
+  bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+  void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
   void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE;
 
   // Required for target - register utilities.
@@ -372,17 +375,15 @@ class X86Mir2Lir : public Mir2Lir {
    */
   LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+  void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE;
+  void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+  void DoPromotion() OVERRIDE;
+
   /*
    * @brief Handle x86 specific literals
    */
   void InstallLiteralPools() OVERRIDE;
 
-  /*
-   * @brief Generate the debug_frame FDE information.
-   * @returns pointer to vector containing CFE information
-   */
-  std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
-
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  protected:
@@ -491,7 +492,6 @@ class X86Mir2Lir : public Mir2Lir {
   void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
   void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
                  int32_t raw_index, int scale, int32_t table_or_disp);
-  void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset);
   void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
   void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
                                 int64_t val, ConditionCode ccode);
@@ -862,12 +862,6 @@ class X86Mir2Lir : public Mir2Lir {
   void SpillFPRegs();
 
   /*
-   * @brief Perform MIR analysis before compiling method.
-   * @note Invokes Mir2LiR::Materialize after analysis.
-   */
-  void Materialize();
-
-  /*
    * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
    * without regard to data type.  In practice, this can result in UpdateLoc returning a
    * location record for a Dalvik float value in a core register, and vis-versa.  For targets
@@ -881,67 +875,39 @@ class X86Mir2Lir : public Mir2Lir {
   RegLocation UpdateLocWideTyped(RegLocation loc);
 
   /*
-   * @brief Analyze MIR before generating code, to prepare for the code generation.
-   */
-  void AnalyzeMIR();
-
-  /*
-   * @brief Analyze one basic block.
-   * @param bb Basic block to analyze.
-   */
-  void AnalyzeBB(BasicBlock* bb);
-
-  /*
-   * @brief Analyze one extended MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
-   * @param mir Extended instruction to analyze.
-   */
-  void AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir);
-
-  /*
-   * @brief Analyze one MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
-   * @param mir Instruction to analyze.
-   */
-  virtual void AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir);
-
-  /*
    * @brief Analyze one MIR float/double instruction
    * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
    * @param mir Instruction to analyze.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  virtual void AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir);
+  bool AnalyzeFPInstruction(int opcode, MIR* mir);
 
   /*
    * @brief Analyze one use of a double operand.
    * @param rl_use Double RegLocation for the operand.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  void AnalyzeDoubleUse(RegLocation rl_use);
+  bool AnalyzeDoubleUse(RegLocation rl_use);
 
   /*
    * @brief Analyze one invoke-static MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
    * @param mir Instruction to analyze.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  void AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir);
+  bool AnalyzeInvokeStaticIntrinsic(MIR* mir);
 
   // Information derived from analysis of MIR
 
-  // The compiler temporary for the code address of the method.
-  CompilerTemp *base_of_code_;
-
-  // Have we decided to compute a ptr to code and store in temporary VR?
-  bool store_method_addr_;
+  // The base register for PC-relative addressing if promoted (32-bit only).
+  RegStorage pc_rel_base_reg_;
 
-  // Have we used the stored method address?
-  bool store_method_addr_used_;
+  // Have we actually used the pc_rel_base_reg_?
+  bool pc_rel_base_reg_used_;
 
-  // Instructions to remove if we didn't use the stored method address.
-  LIR* setup_method_address_[2];
+  // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing.
+  // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>"
+  // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing.
+  LIR* setup_pc_rel_base_reg_;  // There are 2 chained insns (no reordering allowed).
 
   // Instructions needing patching with Method* values.
   ArenaVector<LIR*> method_address_insns_;
@@ -952,11 +918,8 @@ class X86Mir2Lir : public Mir2Lir {
   // Instructions needing patching with PC relative code addresses.
   ArenaVector<LIR*> call_method_insns_;
 
-  // Prologue decrement of stack pointer.
-  LIR* stack_decrement_;
-
-  // Epilogue increment of stack pointer.
-  LIR* stack_increment_;
+  // Instructions needing patching with PC relative code addresses.
+  ArenaVector<LIR*> dex_cache_access_insns_;
 
   // The list of const vector literals.
   LIR* const_vectors_;
@@ -992,6 +955,20 @@ class X86Mir2Lir : public Mir2Lir {
   void SwapBits(RegStorage result_reg, int shift, int32_t value);
   void SwapBits64(RegStorage result_reg, int shift, int64_t value);
 
+  static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                               int state, const MethodReference& target_method,
+                               uint32_t,
+                               uintptr_t direct_code, uintptr_t direct_method,
+                               InvokeType type);
+
+  LIR* OpLoadPc(RegStorage r_dest);
+  RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg());
+
+  // When we don't know the proper offset for the value, pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler or linker later to have
+  // the right value.
+  static constexpr int kDummy32BitOffset = 256;
+
   static const X86EncodingMap EncodingMap[kX86Last];
 
   friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index d8616a7bf3..cfe0480c54 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -756,24 +756,6 @@ bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double)
     branch_nan->target = NewLIR0(kPseudoTargetLabel);
     LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
 
-    // The base_of_code_ compiler temp is non-null when it is reserved
-    // for being able to do data accesses relative to method start.
-    if (base_of_code_ != nullptr) {
-      // Loading from the constant pool may have used base of code register.
-      // However, the code here generates logic in diamond shape and not all
-      // paths load base of code register. Therefore, we ensure it is clobbered so
-      // that the temp caching system does not believe it is live at merge point.
-      RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-      if (rl_method.wide) {
-        rl_method = UpdateLocWide(rl_method);
-      } else {
-        rl_method = UpdateLoc(rl_method);
-      }
-      if (rl_method.location == kLocPhysReg) {
-        Clobber(rl_method.reg);
-      }
-    }
-
     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
     // Handle Min/Max. Copy greater/lesser value from src2.
     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4eb626c14f..1043815e10 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
   return rl_result;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
 
@@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
 
     // Do we have a free register for intermediate calculations?
     RegStorage tmp = AllocTemp(false);
+    const int kRegSize = cu_->target64 ? 8 : 4;
     if (tmp == RegStorage::InvalidReg()) {
        /*
         * No, will use 'edi'.
@@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
               IsTemp(rl_result.reg.GetHigh()));
        tmp = rs_rDI;
        NewLIR1(kX86Push32R, tmp.GetReg());
+       cfi_.AdjustCFAOffset(kRegSize);
+       // Record cfi only if it is not already spilled.
+       if (!CoreSpillMaskContains(tmp.GetReg())) {
+         cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
+       }
     }
 
     // Now we are ready to do calculations.
@@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
     // Let's put pop 'edi' here to break a bit the dependency chain.
     if (tmp == rs_rDI) {
       NewLIR1(kX86Pop32R, tmp.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(tmp.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
+      }
     } else {
       FreeTemp(tmp);
     }
@@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   // If is_long, high half is in info->args[7]
+  const int kRegSize = cu_->target64 ? 8 : 4;
 
   if (is_long && cu_->target64) {
     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
@@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
     FreeTemp(rs_r0q);
   } else if (is_long) {
     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
-    // TODO: CFI support.
     FlushAllRegs();
     LockCallTemps();
     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
@@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
       NewLIR1(kX86Push32R, rs_rDI.GetReg());
       MarkTemp(rs_rDI);
       LockTemp(rs_rDI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+      }
     }
     if (push_si) {
       NewLIR1(kX86Push32R, rs_rSI.GetReg());
       MarkTemp(rs_rSI);
       LockTemp(rs_rSI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
+      }
     }
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
@@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
       FreeTemp(rs_rSI);
       UnmarkTemp(rs_rSI);
       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
+      }
     }
     if (push_di) {
       FreeTemp(rs_rDI);
       UnmarkTemp(rs_rDI);
       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
+      }
     }
     FreeCallTemps();
   } else {
@@ -1327,37 +1359,79 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   if (cu_->target64) {
     // We can do this directly using RIP addressing.
-    // We don't know the proper offset for the value, so pick one that will force
-    // 4 byte offset.  We will fix this up in the assembler later to have the right
-    // value.
     ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-    LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
+    LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
     res->target = target;
     res->flags.fixup = kFixupLoad;
     return;
   }
 
-  CHECK(base_of_code_ != nullptr);
-
-  // Address the start of the method
-  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  if (rl_method.wide) {
-    LoadValueDirectWideFixed(rl_method, reg);
-  } else {
-    LoadValueDirectFixed(rl_method, reg);
-  }
-  store_method_addr_used_ = true;
+  // Get the PC to a register and get the anchor.
+  LIR* anchor;
+  RegStorage r_pc = GetPcAndAnchor(&anchor);
 
   // Load the proper value from the literal area.
-  // We don't know the proper offset for the value, so pick one that will force
-  // 4 byte offset.  We will fix this up in the assembler later to have the right
-  // value.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256);
+  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+  res->operands[4] = WrapPointer(anchor);
   res->target = target;
   res->flags.fixup = kFixupLoad;
 }
 
+bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return dex_cache_arrays_layout_.Valid();
+}
+
+LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
+  DCHECK(!cu_->target64);
+  LIR* call = NewLIR1(kX86CallI, 0);
+  call->flags.fixup = kFixupLabel;
+  LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
+  pop->flags.fixup = kFixupLabel;
+  DCHECK(NEXT_LIR(call) == pop);
+  return call;
+}
+
+RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
+  if (pc_rel_base_reg_.Valid()) {
+    DCHECK(setup_pc_rel_base_reg_ != nullptr);
+    *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
+    DCHECK(*anchor != nullptr);
+    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+    pc_rel_base_reg_used_ = true;
+    return pc_rel_base_reg_;
+  } else {
+    RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
+    LIR* load_pc = OpLoadPc(r_pc);
+    *anchor = NEXT_LIR(load_pc);
+    DCHECK(*anchor != nullptr);
+    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+    return r_pc;
+  }
+}
+
+void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
+                                          RegStorage r_dest) {
+  if (cu_->target64) {
+    LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), kRIPReg, kDummy32BitOffset);
+    mov->flags.fixup = kFixupLabel;
+    mov->operands[3] = WrapPointer(dex_file);
+    mov->operands[4] = offset;
+    mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
+    dex_cache_access_insns_.push_back(mov);
+  } else {
+    // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
+    LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+    mov->flags.fixup = kFixupLabel;
+    mov->operands[3] = WrapPointer(dex_file);
+    mov->operands[4] = offset;
+    mov->target = anchor;  // Used for pc_insn_offset.
+    dex_cache_access_insns_.push_back(mov);
+  }
+}
+
 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
   UNUSED(r_base, count);
   LOG(FATAL) << "Unexpected use of OpVldm for x86";
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index f128eb78a3..a16e242d08 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -32,7 +32,6 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "x86_lir.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 
@@ -725,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) {
   return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num);
+}
+
 void X86Mir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -735,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() {
       frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      StoreBaseDisp(rs_rSP, offset,
-                    cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile);
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset);
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -754,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() {
   int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile);
+      cfi_.Restore(DwarfCoreReg(cu_->target64, reg));
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -771,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() {
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile);
+      cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset);
       offset += sizeof(double);
     }
   }
@@ -786,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() {
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg),
                    k64, kNotVolatile);
+      cfi_.Restore(DwarfFpReg(cu_->target64, reg));
       offset += sizeof(double);
     }
   }
@@ -825,21 +835,22 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
       in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
-      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
+      pc_rel_base_reg_(RegStorage::InvalidReg()),
+      pc_rel_base_reg_used_(false),
+      setup_pc_rel_base_reg_(nullptr),
       method_address_insns_(arena->Adapter()),
       class_type_address_insns_(arena->Adapter()),
       call_method_insns_(arena->Adapter()),
-      stack_decrement_(nullptr), stack_increment_(nullptr),
+      dex_cache_access_insns_(arena->Adapter()),
       const_vectors_(nullptr) {
   method_address_insns_.reserve(100);
   class_type_address_insns_.reserve(100);
   call_method_insns_.reserve(100);
-  store_method_addr_used_ = false;
-    for (int i = 0; i < kX86Last; i++) {
-      DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
-          << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
-          << " is wrong: expecting " << i << ", seeing "
-          << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
+  for (int i = 0; i < kX86Last; i++) {
+    DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
+        << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
+        << " is wrong: expecting " << i << ", seeing "
+        << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
   }
 }
 
@@ -924,14 +935,6 @@ void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
              << ", orig: " << loc.orig_sreg;
 }
 
-void X86Mir2Lir::Materialize() {
-  // A good place to put the analysis before starting.
-  AnalyzeMIR();
-
-  // Now continue with regular code generation.
-  Mir2Lir::Materialize();
-}
-
 void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
                                    SpecialTargetRegister symbolic_reg) {
   /*
@@ -1058,6 +1061,9 @@ void X86Mir2Lir::InstallLiteralPools() {
     }
   }
 
+  patches_.reserve(method_address_insns_.size() + class_type_address_insns_.size() +
+                   call_method_insns_.size() + dex_cache_access_insns_.size());
+
   // Handle the fixups for methods.
   for (LIR* p : method_address_insns_) {
       DCHECK_EQ(p->opcode, kX86Mov32RI);
@@ -1084,7 +1090,6 @@ void X86Mir2Lir::InstallLiteralPools() {
   }
 
   // And now the PC-relative calls to methods.
-  patches_.reserve(call_method_insns_.size());
   for (LIR* p : call_method_insns_) {
       DCHECK_EQ(p->opcode, kX86CallI);
       uint32_t target_method_idx = p->operands[1];
@@ -1096,6 +1101,18 @@ void X86Mir2Lir::InstallLiteralPools() {
                                                         target_dex_file, target_method_idx));
   }
 
+  // PC-relative references to dex cache arrays.
+  for (LIR* p : dex_cache_access_insns_) {
+    DCHECK(p->opcode == kX86Mov32RM);
+    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[3]);
+    uint32_t offset = p->operands[4];
+    // The offset to patch is the last 4 bytes of the instruction.
+    int patch_offset = p->offset + p->flags.size - 4;
+    DCHECK(!p->flags.is_nop);
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file,
+                                                       p->target->offset, offset));
+  }
+
   // And do the normal processing.
   Mir2Lir::InstallLiteralPools();
 }
@@ -1303,6 +1320,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   if (!cu_->target64) {
     // EDI is promotable in 32-bit mode.
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(4);
+    // Record cfi only if it is not already spilled.
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+    }
   }
 
   if (zero_based) {
@@ -1398,8 +1420,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   // And join up at the end.
   all_done->target = NewLIR0(kPseudoTargetLabel);
 
-  if (!cu_->target64)
+  if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-4);
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()));
+    }
+  }
 
   // Out of line code returns here.
   if (slowpath_branch != nullptr) {
@@ -1412,100 +1439,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   return true;
 }
 
-static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
-  if (is_x86_64) {
-    switch (art_reg_id) {
-    case 3 : *dwarf_reg_id =  3; return true;  // %rbx
-    // This is the only discrepancy between ART & DWARF register numbering.
-    case 5 : *dwarf_reg_id =  6; return true;  // %rbp
-    case 12: *dwarf_reg_id = 12; return true;  // %r12
-    case 13: *dwarf_reg_id = 13; return true;  // %r13
-    case 14: *dwarf_reg_id = 14; return true;  // %r14
-    case 15: *dwarf_reg_id = 15; return true;  // %r15
-    default: return false;  // Should not get here
-    }
-  } else {
-    switch (art_reg_id) {
-    case 5: *dwarf_reg_id = 5; return true;  // %ebp
-    case 6: *dwarf_reg_id = 6; return true;  // %esi
-    case 7: *dwarf_reg_id = 7; return true;  // %edi
-    default: return false;  // Should not get here
-    }
-  }
-}
-
-std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Generate the FDE for the method.
-  DCHECK_NE(data_offset_, 0U);
-
-  WriteFDEHeader(cfi_info, cu_->target64);
-  WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64);
-
-  // The instructions in the FDE.
-  if (stack_decrement_ != nullptr) {
-    // Advance LOC to just past the stack decrement.
-    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
-    DW_CFA_advance_loc(cfi_info, pc);
-
-    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
-    DW_CFA_def_cfa_offset(cfi_info, frame_size_);
-
-    // Handle register spills
-    const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
-    const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4;
-    uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-    int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-    for (int reg = 0; mask; mask >>= 1, reg++) {
-      if (mask & 0x1) {
-        pc += kSpillInstLen;
-
-        // Advance LOC to pass this instruction
-        DW_CFA_advance_loc(cfi_info, kSpillInstLen);
-
-        int dwarf_reg_id;
-        if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
-          // DW_CFA_offset_extended_sf reg offset
-          DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
-        }
-
-        offset += GetInstructionSetPointerSize(cu_->instruction_set);
-      }
-    }
-
-    // We continue with that stack until the epilogue.
-    if (stack_increment_ != nullptr) {
-      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
-      DW_CFA_advance_loc(cfi_info, new_pc - pc);
-
-      // We probably have code snippets after the epilogue, so save the
-      // current state: DW_CFA_remember_state.
-      DW_CFA_remember_state(cfi_info);
-
-      // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
-      // There is only the return PC on the stack now.
-      DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
-
-      // Everything after that is the same as before the epilogue.
-      // Stack bump was followed by RET instruction.
-      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
-      if (post_ret_insn != nullptr) {
-        pc = new_pc;
-        new_pc = post_ret_insn->offset;
-        DW_CFA_advance_loc(cfi_info, new_pc - pc);
-        // Restore the state: DW_CFA_restore_state.
-        DW_CFA_restore_state(cfi_info);
-      }
-    }
-  }
-
-  PadCFI(cfi_info);
-  WriteCFILength(cfi_info, cu_->target64);
-
-  return cfi_info;
-}
-
 void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
   switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
     case kMirOpReserveVectorRegisters:
@@ -1642,20 +1575,17 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
   LIR* load;
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   if (cu_->target64) {
-    load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */);
+    load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset);
   } else {
-    // Address the start of the method.
-    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    if (rl_method.wide) {
-      rl_method = LoadValueWide(rl_method, kCoreReg);
-    } else {
-      rl_method = LoadValue(rl_method, kCoreReg);
+    // Get the PC to a register and get the anchor.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor);
+
+    load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset);
+    load->operands[4] = WrapPointer(anchor);
+    if (IsTemp(r_pc)) {
+      FreeTemp(r_pc);
     }
-
-    load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
-
-    // The literal pool needs position independent logic.
-    store_method_addr_used_ = true;
   }
   load->flags.fixup = kFixupLoad;
   load->target = data_target;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 893b98a49d..efcb9eefb5 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -17,6 +17,7 @@
 #include "codegen_x86.h"
 
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
@@ -574,7 +575,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
       DCHECK(r_dest.IsDouble());
       if (value == 0) {
         return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
-      } else if (base_of_code_ != nullptr || cu_->target64) {
+      } else if (pc_rel_base_reg_.Valid() || cu_->target64) {
         // We will load the value from the literal area.
         LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
         if (data_target == NULL) {
@@ -589,17 +590,16 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
         if (cu_->target64) {
           res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
         } else {
-          // Address the start of the method.
-          RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-          if (rl_method.wide) {
-            rl_method = LoadValueWide(rl_method, kCoreReg);
-          } else {
-            rl_method = LoadValue(rl_method, kCoreReg);
-          }
+          // Get the PC to a register and get the anchor.
+          LIR* anchor;
+          RegStorage r_pc = GetPcAndAnchor(&anchor);
 
-          res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
+          res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val),
                              kDouble, kNotVolatile);
-          store_method_addr_used_ = true;
+          res->operands[4] = WrapPointer(anchor);
+          if (IsTemp(r_pc)) {
+            FreeTemp(r_pc);
+          }
         }
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
@@ -954,82 +954,14 @@ LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegS
   return branch;
 }
 
-void X86Mir2Lir::AnalyzeMIR() {
-  // Assume we don't need a pointer to the base of the code.
-  cu_->NewTimingSplit("X86 MIR Analysis");
-  store_method_addr_ = false;
-
-  // Walk the MIR looking for interesting items.
-  PreOrderDfsIterator iter(mir_graph_);
-  BasicBlock* curr_bb = iter.Next();
-  while (curr_bb != NULL) {
-    AnalyzeBB(curr_bb);
-    curr_bb = iter.Next();
-  }
-
-  // Did we need a pointer to the method code?  Not in 64 bit mode.
-  base_of_code_ = nullptr;
-
-  // store_method_addr_ must be false for x86_64, since RIP addressing is used.
-  CHECK(!(cu_->target64 && store_method_addr_));
-  if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
-    DCHECK(base_of_code_ != nullptr);
-  }
-}
-
-void X86Mir2Lir::AnalyzeBB(BasicBlock* bb) {
-  if (bb->block_type == kDead) {
-    // Ignore dead blocks
+void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  if (cu_->target64) {
+    Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
     return;
   }
 
-  for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    int opcode = mir->dalvikInsn.opcode;
-    if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      AnalyzeExtendedMIR(opcode, bb, mir);
-    } else {
-      AnalyzeMIR(opcode, bb, mir);
-    }
-  }
-}
-
-
-void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) {
-  switch (opcode) {
-    // Instructions referencing doubles.
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-      AnalyzeFPInstruction(opcode, bb, mir);
-      break;
-    case kMirOpConstVector:
-      if (!cu_->target64) {
-        store_method_addr_ = true;
-      }
-      break;
-    case kMirOpPackedMultiply:
-    case kMirOpPackedShiftLeft:
-    case kMirOpPackedSignedShiftRight:
-    case kMirOpPackedUnsignedShiftRight:
-      if (!cu_->target64) {
-        // Byte emulation requires constants from the literal pool.
-        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-        if (opsize == kSignedByte || opsize == kUnsignedByte) {
-          store_method_addr_ = true;
-        }
-      }
-      break;
-    default:
-      // Ignore the rest.
-      break;
-  }
-}
-
-void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
-  // Looking for
-  // - Do we need a pointer to the code (used for packed switches and double lits)?
-  // 64 bit uses RIP addressing instead.
-
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_pc_rel_load = false;
   switch (opcode) {
     // Instructions referencing doubles.
     case Instruction::CMPL_DOUBLE:
@@ -1045,34 +977,62 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
     case Instruction::MUL_DOUBLE_2ADDR:
     case Instruction::DIV_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE_2ADDR:
-      AnalyzeFPInstruction(opcode, bb, mir);
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+      uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir);
       break;
 
-    // Packed switches and array fills need a pointer to the base of the method.
-    case Instruction::FILL_ARRAY_DATA:
+    // Packed switch needs the PC-relative pointer if it's large.
     case Instruction::PACKED_SWITCH:
-      if (!cu_->target64) {
-        store_method_addr_ = true;
+      if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) {
+        uses_pc_rel_load = true;
       }
       break;
+
+    case kMirOpConstVector:
+      uses_pc_rel_load = true;
+      break;
+    case kMirOpPackedMultiply:
+    case kMirOpPackedShiftLeft:
+    case kMirOpPackedSignedShiftRight:
+    case kMirOpPackedUnsignedShiftRight:
+      {
+        // Byte emulation requires constants from the literal pool.
+        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+        if (opsize == kSignedByte || opsize == kUnsignedByte) {
+          uses_pc_rel_load = true;
+        }
+      }
+      break;
+
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
-      AnalyzeInvokeStatic(opcode, bb, mir);
-      break;
+      if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) {
+        uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     default:
-      // Other instructions are not interesting yet.
+      Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
       break;
   }
+
+  if (uses_pc_rel_load) {
+    DCHECK(pc_rel_temp_ != nullptr);
+    core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+  }
 }
 
-void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) {
+  DCHECK(!cu_->target64);
   // Look at all the uses, and see if they are double constants.
   uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
   int next_sreg = 0;
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
       next_sreg += 2;
     } else {
       next_sreg++;
@@ -1080,7 +1040,9 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
   }
   if (attrs & DF_UB) {
     if (attrs & DF_B_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
       next_sreg += 2;
     } else {
       next_sreg++;
@@ -1088,15 +1050,39 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
   }
   if (attrs & DF_UC) {
     if (attrs & DF_C_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
     }
   }
+  return false;
 }
 
-void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
   // If this is a double literal, we will want it in the literal pool on 32b platforms.
-  if (use.is_const && !cu_->target64) {
-    store_method_addr_ = true;
+  DCHECK(!cu_->target64);
+  return use.is_const;
+}
+
+bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) {
+  // 64 bit RIP addressing doesn't need this analysis.
+  DCHECK(!cu_->target64);
+
+  // Retrieve the type of the intrinsic.
+  MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod();
+  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+  DexFileMethodInliner* method_inliner =
+    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file);
+  InlineMethod method;
+  bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method);
+  DCHECK(is_intrinsic);
+
+  switch (method.opcode) {
+    case kIntrinsicAbsDouble:
+    case kIntrinsicMinMaxDouble:
+      return true;
+    default:
+      return false;
   }
 }
 
@@ -1128,37 +1114,47 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) {
   return loc;
 }
 
-void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) {
-  UNUSED(opcode, bb);
-
-  // 64 bit RIP addressing doesn't need store_method_addr_ set.
+LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
+  UNUSED(r_tgt);  // Call to absolute memory location doesn't need a temporary target register.
   if (cu_->target64) {
-    return;
+    return OpThreadMem(op, GetThreadOffset<8>(trampoline));
+  } else {
+    return OpThreadMem(op, GetThreadOffset<4>(trampoline));
   }
+}
 
-  uint32_t index = mir->dalvikInsn.vB;
-  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  DexFileMethodInliner* method_inliner =
-    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-  InlineMethod method;
-  if (method_inliner->IsIntrinsic(index, &method)) {
-    switch (method.opcode) {
-      case kIntrinsicAbsDouble:
-      case kIntrinsicMinMaxDouble:
-        store_method_addr_ = true;
-        break;
-      default:
-        break;
+void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative
+    // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions
+    // per use.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 2;
     }
   }
 }
 
-LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
-  UNUSED(r_tgt);  // Call to absolute memory location doesn't need a temporary target register.
-  if (cu_->target64) {
-    return OpThreadMem(op, GetThreadOffset<8>(trampoline));
-  } else {
-    return OpThreadMem(op, GetThreadOffset<4>(trampoline));
+void X86Mir2Lir::DoPromotion() {
+  if (!cu_->target64) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
   }
 }
 
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7dea09a579..57db0158e4 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -635,8 +635,6 @@ enum X86OpCode {
   kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
   kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
   kX86Ret,              // ret; no lir operands
-  kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
-                        // lir operands - 0: reg
   kX86PcRelLoadRA,      // mov reg, [base + index * scale + PC relative displacement]
                         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
   kX86PcRelAdr,         // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
@@ -670,7 +668,6 @@ enum X86EncodingKind {
   kRegMemCond,                             // RM instruction kind followed by a condition.
   kJmp, kJcc, kCall,                       // Branch instruction kinds.
   kPcRel,                                  // Operation with displacement that is PC relative
-  kMacro,                                  // An instruction composing multiple others
   kUnimplemented                           // Encoding used when an instruction isn't yet implemented.
 };
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 100d49a99e..c2b837512c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -31,6 +31,7 @@
 #include "base/timing_logger.h"
 #include "class_linker.h"
 #include "compiled_class.h"
+#include "compiled_method.h"
 #include "compiler.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
@@ -62,6 +63,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
@@ -348,6 +350,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
+      compiler_kind_(compiler_kind),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
       freezing_constructor_lock_("freezing constructor lock"),
@@ -1173,6 +1176,13 @@ uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const {
   return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
 }
 
+DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
+  // Currently only image dex caches have fixed array layout.
+  return IsImage() && GetSupportBootImageFixup()
+      ? DexCacheArraysLayout(dex_file)
+      : DexCacheArraysLayout();
+}
+
 void CompilerDriver::ProcessedInstanceField(bool resolved) {
   if (!resolved) {
     stats_->UnresolvedInstanceField();
@@ -2205,10 +2215,8 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
         InstructionSetHasGenericJniStub(instruction_set_)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
-      if (instruction_set_ != kMips64) {  // Use generic JNI for Mips64 (temporarily).
-        compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
-        CHECK(compiled_method != nullptr);
-      }
+      compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
+      CHECK(compiled_method != nullptr);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
@@ -2246,7 +2254,7 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
     // Count non-relative linker patches.
     size_t non_relative_linker_patch_count = 0u;
     for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-      if (patch.Type() != kLinkerPatchCallRelative) {
+      if (!patch.IsPcRelative()) {
         ++non_relative_linker_patch_count;
       }
     }
@@ -2263,8 +2271,11 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
     DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file);
   }
 
-  // Done compiling, delete the verified method to reduce native memory usage.
-  verification_results_->RemoveVerifiedMethod(method_ref);
+  // Done compiling, delete the verified method to reduce native memory usage. Do not delete in
+  // optimizing compiler, which may need the verified method again for inlining.
+  if (compiler_kind_ != Compiler::kOptimizing) {
+    verification_results_->RemoveVerifiedMethod(method_ref);
+  }
 
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
@@ -2359,44 +2370,6 @@ bool CompilerDriver::WriteElf(const std::string& android_root,
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
 }
-void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                                std::string* target_triple,
-                                                std::string* target_cpu,
-                                                std::string* target_attr) {
-  switch (instruction_set) {
-    case kThumb2:
-      *target_triple = "thumb-none-linux-gnueabi";
-      *target_cpu = "cortex-a9";
-      *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kArm:
-      *target_triple = "armv7-none-linux-gnueabi";
-      // TODO: Fix for Nexus S.
-      *target_cpu = "cortex-a9";
-      // TODO: Fix for Xoom.
-      *target_attr = "+v7,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kX86:
-      *target_triple = "i386-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kX86_64:
-      *target_triple = "x86_64-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kMips:
-      *target_triple = "mipsel-unknown-linux";
-      *target_attr = "mips32r2";
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown instruction set: " << instruction_set;
-    }
-  }
 
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
@@ -2438,7 +2411,7 @@ std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   gc::Heap* const heap = runtime->GetHeap();
   oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
   oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
-#ifdef HAVE_MALLOC_H
+#if defined(__BIONIC__) || defined(__GLIBC__)
   struct mallinfo info = mallinfo();
   const size_t allocated_space = static_cast<size_t>(info.uordblks);
   const size_t free_space = static_cast<size_t>(info.fordblks);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index b825293c33..a6ed5590dc 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -26,11 +26,8 @@
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "class_reference.h"
-#include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_options.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
@@ -39,7 +36,9 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
+#include "utils/array_ref.h"
 #include "utils/dedupe_set.h"
+#include "utils/dex_cache_arrays_layout.h"
 #include "utils/swap_space.h"
 #include "utils.h"
 
@@ -54,6 +53,7 @@ class MethodVerifier;
 }  // namespace verifier
 
 class CompiledClass;
+class CompiledMethod;
 class CompilerOptions;
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
@@ -62,6 +62,9 @@ class InstructionSetFeatures;
 class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
+template <class Allocator> class SrcMap;
+class SrcMapElem;
+using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
 template<class T> class Handle;
 class TimingLogger;
 class VerificationResults;
@@ -318,6 +321,10 @@ class CompilerDriver {
   bool IsMethodsClassInitialized(mirror::Class* referrer_class, mirror::ArtMethod* resolved_method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the
+  // dex cache arrays don't have a fixed layout.
+  DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file);
+
   void ProcessedInstanceField(bool resolved);
   void ProcessedStaticField(bool resolved, bool local);
   void ProcessedInvoke(InvokeType invoke_type, int flags);
@@ -378,12 +385,6 @@ class CompilerDriver {
                 OatWriter* oat_writer,
                 File* file);
 
-  // TODO: move to a common home for llvm helpers once quick/portable are merged.
-  static void InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                         std::string* target_triple,
-                                         std::string* target_cpu,
-                                         std::string* target_attr);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
@@ -550,6 +551,7 @@ class CompilerDriver {
   DexFileToMethodInlinerMap* const method_inliner_map_;
 
   std::unique_ptr<Compiler> compiler_;
+  Compiler::Kind compiler_kind_;
 
   const InstructionSet instruction_set_;
   const InstructionSetFeatures* const instruction_set_features_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index e436f52db3..fc00c926b2 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -42,6 +42,11 @@ CompilerOptions::CompilerOptions()
       init_failure_output_(nullptr) {
 }
 
+CompilerOptions::~CompilerOptions() {
+  // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here
+  // because we don't want to include the PassManagerOptions definition from the header file.
+}
+
 CompilerOptions::CompilerOptions(CompilerFilter compiler_filter,
                                  size_t huge_method_threshold,
                                  size_t large_method_threshold,
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index d06ec278ab..f7ea385e19 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -53,6 +53,7 @@ class CompilerOptions FINAL {
   static const bool kDefaultIncludePatchInformation = false;
 
   CompilerOptions();
+  ~CompilerOptions();
 
   CompilerOptions(CompilerFilter compiler_filter,
                   size_t huge_method_threshold,
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
new file mode 100644
index 0000000000..d0d182106f
--- /dev/null
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "dwarf.h"
+#include "register.h"
+#include "writer.h"
+#include "utils.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for .debug_frame opcodes (DWARF-3).
+// See the DWARF specification for the precise meaning of the opcodes.
+// The writer is very light-weight, however it will do the following for you:
+//  * Choose the most compact encoding of a given opcode.
+//  * Keep track of current state and convert absolute values to deltas.
+//  * Divide by header-defined factors as appropriate.
+template<typename Allocator = std::allocator<uint8_t> >
+class DebugFrameOpCodeWriter : private Writer<Allocator> {
+ public:
+  // To save space, DWARF divides most offsets by header-defined factors.
+  // They are used in integer divisions, so we make them constants.
+  // We usually subtract from stack base pointer, so making the factor
+  // negative makes the encoded values positive and thus easier to encode.
+  static constexpr int kDataAlignmentFactor = -4;
+  static constexpr int kCodeAlignmentFactor = 1;
+
+  // Explicitely advance the program counter to given location.
+  void ALWAYS_INLINE AdvancePC(int absolute_pc) {
+    DCHECK_GE(absolute_pc, current_pc_);
+    if (UNLIKELY(enabled_)) {
+      int delta = FactorCodeOffset(absolute_pc - current_pc_);
+      if (delta != 0) {
+        if (delta <= 0x3F) {
+          this->PushUint8(DW_CFA_advance_loc | delta);
+        } else if (delta <= UINT8_MAX) {
+          this->PushUint8(DW_CFA_advance_loc1);
+          this->PushUint8(delta);
+        } else if (delta <= UINT16_MAX) {
+          this->PushUint8(DW_CFA_advance_loc2);
+          this->PushUint16(delta);
+        } else {
+          this->PushUint8(DW_CFA_advance_loc4);
+          this->PushUint32(delta);
+        }
+      }
+      current_pc_ = absolute_pc;
+    }
+  }
+
+  // Override this method to automatically advance the PC before each opcode.
+  virtual void ImplicitlyAdvancePC() { }
+
+  // Common alias in assemblers - spill relative to current stack pointer.
+  void ALWAYS_INLINE RelOffset(Reg reg, int offset) {
+    Offset(reg, offset - current_cfa_offset_);
+  }
+
+  // Common alias in assemblers - increase stack frame size.
+  void ALWAYS_INLINE AdjustCFAOffset(int delta) {
+    DefCFAOffset(current_cfa_offset_ + delta);
+  }
+
+  // Custom alias - spill many registers based on bitmask.
+  void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset,
+                                      uint32_t reg_mask, int reg_size) {
+    DCHECK(reg_size == 4 || reg_size == 8);
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
+        RelOffset(Reg(reg_base.num() + i), offset);
+        offset += reg_size;
+      }
+    }
+  }
+
+  // Custom alias - unspill many registers based on bitmask.
+  void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) {
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
+        Restore(Reg(reg_base.num() + i));
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Nop() {
+    if (UNLIKELY(enabled_)) {
+      this->PushUint8(DW_CFA_nop);
+    }
+  }
+
+  void ALWAYS_INLINE Offset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        if (0 <= reg.num() && reg.num() <= 0x3F) {
+          this->PushUint8(DW_CFA_offset | reg.num());
+          this->PushUleb128(factored_offset);
+        } else {
+          this->PushUint8(DW_CFA_offset_extended);
+          this->PushUleb128(reg.num());
+          this->PushUleb128(factored_offset);
+        }
+      } else {
+        uses_dwarf3_features_ = true;
+        this->PushUint8(DW_CFA_offset_extended_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(factored_offset);
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Restore(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      if (0 <= reg.num() && reg.num() <= 0x3F) {
+        this->PushUint8(DW_CFA_restore | reg.num());
+      } else {
+        this->PushUint8(DW_CFA_restore_extended);
+        this->PushUleb128(reg.num());
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Undefined(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_undefined);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE SameValue(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_same_value);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  // The previous value of "reg" is stored in register "new_reg".
+  void ALWAYS_INLINE Register(Reg reg, Reg new_reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_register);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(new_reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE RememberState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_remember_state);
+    }
+  }
+
+  void ALWAYS_INLINE RestoreState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_restore_state);
+    }
+  }
+
+  void ALWAYS_INLINE DefCFA(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      if (offset >= 0) {
+        this->PushUint8(DW_CFA_def_cfa);
+        this->PushUleb128(reg.num());
+        this->PushUleb128(offset);  // Non-factored.
+      } else {
+        uses_dwarf3_features_ = true;
+        this->PushUint8(DW_CFA_def_cfa_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(FactorDataOffset(offset));
+      }
+    }
+    current_cfa_offset_ = offset;
+  }
+
+  void ALWAYS_INLINE DefCFARegister(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_def_cfa_register);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAOffset(int offset) {
+    if (UNLIKELY(enabled_)) {
+      if (current_cfa_offset_ != offset) {
+        ImplicitlyAdvancePC();
+        if (offset >= 0) {
+          this->PushUint8(DW_CFA_def_cfa_offset);
+          this->PushUleb128(offset);  // Non-factored.
+        } else {
+          uses_dwarf3_features_ = true;
+          this->PushUint8(DW_CFA_def_cfa_offset_sf);
+          this->PushSleb128(FactorDataOffset(offset));
+        }
+      }
+    }
+    // Uncoditional so that the user can still get and check the value.
+    current_cfa_offset_ = offset;
+  }
+
+  void ALWAYS_INLINE ValOffset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        this->PushUint8(DW_CFA_val_offset);
+        this->PushUleb128(reg.num());
+        this->PushUleb128(factored_offset);
+      } else {
+        this->PushUint8(DW_CFA_val_offset_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(factored_offset);
+      }
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_def_cfa_expression);
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_val_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  bool IsEnabled() const { return enabled_; }
+
+  void SetEnabled(bool value) { enabled_ = value; }
+
+  int GetCurrentPC() const { return current_pc_; }
+
+  int GetCurrentCFAOffset() const { return current_cfa_offset_; }
+
+  void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
+
+  using Writer<Allocator>::data;
+
+  DebugFrameOpCodeWriter(bool enabled = true,
+                         const Allocator& alloc = Allocator())
+      : Writer<Allocator>(&opcodes_),
+        enabled_(enabled),
+        opcodes_(alloc),
+        current_cfa_offset_(0),
+        current_pc_(0),
+        uses_dwarf3_features_(false) {
+    if (enabled) {
+      // Best guess based on couple of observed outputs.
+      opcodes_.reserve(16);
+    }
+  }
+
+  virtual ~DebugFrameOpCodeWriter() { }
+
+ protected:
+  int FactorDataOffset(int offset) const {
+    DCHECK_EQ(offset % kDataAlignmentFactor, 0);
+    return offset / kDataAlignmentFactor;
+  }
+
+  int FactorCodeOffset(int offset) const {
+    DCHECK_EQ(offset % kCodeAlignmentFactor, 0);
+    return offset / kCodeAlignmentFactor;
+  }
+
+  bool enabled_;  // If disabled all writes are no-ops.
+  std::vector<uint8_t, Allocator> opcodes_;
+  int current_cfa_offset_;
+  int current_pc_;
+  bool uses_dwarf3_features_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
new file mode 100644
index 0000000000..c0350b6f8a
--- /dev/null
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+
+#include <unordered_map>
+
+#include "dwarf.h"
+#include "leb128.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template< typename Allocator >
+struct FNVHash {
+  size_t operator()(const std::vector<uint8_t, Allocator>& v) const {
+    uint32_t hash = 2166136261u;
+    for (size_t i = 0; i < v.size(); i++) {
+      hash = (hash ^ v[i]) * 16777619u;
+    }
+    return hash;
+  }
+};
+
+/*
+ * Writer for debug information entries (DIE).
+ * It also handles generation of abbreviations.
+ *
+ * Usage:
+ *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
+ *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ *       WriteStrp(DW_AT_name, "Foo", debug_str);
+ *     EndTag();
+ *   EndTag();
+ */
+template< typename Allocator = std::allocator<uint8_t> >
+class DebugInfoEntryWriter FINAL : private Writer<Allocator> {
+ public:
+  // Start debugging information entry.
+  void StartTag(Tag tag, Children children) {
+    DCHECK(has_children) << "This tag can not have nested tags";
+    if (inside_entry_) {
+      // Write abbrev code for the previous entry.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      inside_entry_ = false;
+    }
+    StartAbbrev(tag, children);
+    // Abbrev code placeholder of sufficient size.
+    abbrev_code_offset_ = this->data()->size();
+    this->PushUleb128(NextAbbrevCode());
+    depth_++;
+    inside_entry_ = true;
+    has_children = (children == DW_CHILDREN_yes);
+  }
+
+  // End debugging information entry.
+  void EndTag() {
+    DCHECK_GT(depth_, 0);
+    if (inside_entry_) {
+      // Write abbrev code for this tag.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      inside_entry_ = false;
+    }
+    if (has_children) {
+      this->PushUint8(0);  // End of children.
+    }
+    depth_--;
+    has_children = true;  // Parent tag obviously has children.
+  }
+
+  void WriteAddr(Attribute attrib, uint64_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_addr);
+    if (is64bit_) {
+      this->PushUint64(value);
+    } else {
+      this->PushUint32(value);
+    }
+  }
+
+  void WriteBlock(Attribute attrib, const void* ptr, int size) {
+    AddAbbrevAttribute(attrib, DW_FORM_block);
+    this->PushUleb128(size);
+    this->PushData(ptr, size);
+  }
+
+  void WriteData1(Attribute attrib, uint8_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data1);
+    this->PushUint8(value);
+  }
+
+  void WriteData2(Attribute attrib, uint16_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data2);
+    this->PushUint16(value);
+  }
+
+  void WriteData4(Attribute attrib, uint32_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data4);
+    this->PushUint32(value);
+  }
+
+  void WriteData8(Attribute attrib, uint64_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data8);
+    this->PushUint64(value);
+  }
+
+  void WriteSdata(Attribute attrib, int value) {
+    AddAbbrevAttribute(attrib, DW_FORM_sdata);
+    this->PushSleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, int value) {
+    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, uint32_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteFlag(Attribute attrib, bool value) {
+    AddAbbrevAttribute(attrib, DW_FORM_flag);
+    this->PushUint8(value ? 1 : 0);
+  }
+
+  void WriteRef4(Attribute attrib, int cu_offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_ref4);
+    this->PushUint32(cu_offset);
+  }
+
+  void WriteRef(Attribute attrib, int cu_offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+    this->PushUleb128(cu_offset);
+  }
+
+  void WriteString(Attribute attrib, const char* value) {
+    AddAbbrevAttribute(attrib, DW_FORM_string);
+    this->PushString(value);
+  }
+
+  void WriteStrp(Attribute attrib, int address) {
+    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    this->PushUint32(address);
+  }
+
+  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    int address = debug_str->size();
+    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
+    this->PushUint32(address);
+  }
+
+  bool is64bit() const { return is64bit_; }
+
+  using Writer<Allocator>::data;
+
+  DebugInfoEntryWriter(bool is64bitArch,
+                       std::vector<uint8_t, Allocator>* debug_abbrev,
+                       const Allocator& alloc = Allocator())
+      : Writer<Allocator>(&entries_),
+        debug_abbrev_(debug_abbrev),
+        current_abbrev_(alloc),
+        abbrev_codes_(alloc),
+        entries_(alloc),
+        is64bit_(is64bitArch) {
+    debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
+  }
+
+  ~DebugInfoEntryWriter() {
+    DCHECK_EQ(depth_, 0);
+  }
+
+ private:
+  // Start abbreviation declaration.
+  void StartAbbrev(Tag tag, Children children) {
+    DCHECK(!inside_entry_);
+    current_abbrev_.clear();
+    EncodeUnsignedLeb128(&current_abbrev_, tag);
+    current_abbrev_.push_back(children);
+  }
+
+  // Add attribute specification.
+  void AddAbbrevAttribute(Attribute name, Form type) {
+    DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
+    EncodeUnsignedLeb128(&current_abbrev_, name);
+    EncodeUnsignedLeb128(&current_abbrev_, type);
+  }
+
+  int NextAbbrevCode() {
+    return 1 + abbrev_codes_.size();
+  }
+
+  // End abbreviation declaration and return its code.
+  int EndAbbrev() {
+    DCHECK(inside_entry_);
+    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
+                                                  NextAbbrevCode()));
+    int abbrev_code = it.first->second;
+    if (UNLIKELY(it.second)) {  // Inserted new entry.
+      const std::vector<uint8_t, Allocator>& abbrev = it.first->first;
+      debug_abbrev_.Pop();  // Remove abbrev table terminator.
+      debug_abbrev_.PushUleb128(abbrev_code);
+      debug_abbrev_.PushData(abbrev.data(), abbrev.size());
+      debug_abbrev_.PushUint8(0);  // Attribute list end.
+      debug_abbrev_.PushUint8(0);  // Attribute list end.
+      debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
+    }
+    return abbrev_code;
+  }
+
+ private:
+  // Fields for writing and deduplication of abbrevs.
+  Writer<Allocator> debug_abbrev_;
+  std::vector<uint8_t, Allocator> current_abbrev_;
+  std::unordered_map<std::vector<uint8_t, Allocator>, int,
+                     FNVHash<Allocator> > abbrev_codes_;
+
+  // Fields for writing of debugging information entries.
+  std::vector<uint8_t, Allocator> entries_;
+  bool is64bit_;
+  int depth_ = 0;
+  size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
+  bool inside_entry_ = false;  // Entry ends at first child (if any).
+  bool has_children = true;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h
new file mode 100644
index 0000000000..f34acee647
--- /dev/null
+++ b/compiler/dwarf/debug_line_opcode_writer.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+
+#include "dwarf.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_line opcodes (DWARF-3).
+// The writer is very light-weight, however it will do the following for you:
+//  * Choose the most compact encoding of a given opcode.
+//  * Keep track of current state and convert absolute values to deltas.
+//  * Divide by header-defined factors as appropriate.
+template<typename Allocator = std::allocator<uint8_t>>
+class DebugLineOpCodeWriter FINAL : private Writer<Allocator> {
+ public:
+  static constexpr int kOpcodeBase = 13;
+  static constexpr bool kDefaultIsStmt = true;
+  static constexpr int kLineBase = -5;
+  static constexpr int kLineRange = 14;
+
+  void AddRow() {
+    this->PushUint8(DW_LNS_copy);
+  }
+
+  void AdvancePC(uint64_t absolute_address) {
+    DCHECK_NE(current_address_, 0u);  // Use SetAddress for the first advance.
+    DCHECK_GE(absolute_address, current_address_);
+    if (absolute_address != current_address_) {
+      uint64_t delta = FactorCodeOffset(absolute_address - current_address_);
+      if (delta <= INT32_MAX) {
+        this->PushUint8(DW_LNS_advance_pc);
+        this->PushUleb128(static_cast<int>(delta));
+        current_address_ = absolute_address;
+      } else {
+        SetAddress(absolute_address);
+      }
+    }
+  }
+
+  void AdvanceLine(int absolute_line) {
+    int delta = absolute_line - current_line_;
+    if (delta != 0) {
+      this->PushUint8(DW_LNS_advance_line);
+      this->PushSleb128(delta);
+      current_line_ = absolute_line;
+    }
+  }
+
+  void SetFile(int file) {
+    if (current_file_ != file) {
+      this->PushUint8(DW_LNS_set_file);
+      this->PushUleb128(file);
+      current_file_ = file;
+    }
+  }
+
+  void SetColumn(int column) {
+    this->PushUint8(DW_LNS_set_column);
+    this->PushUleb128(column);
+  }
+
+  void NegateStmt() {
+    this->PushUint8(DW_LNS_negate_stmt);
+  }
+
+  void SetBasicBlock() {
+    this->PushUint8(DW_LNS_set_basic_block);
+  }
+
+  void SetPrologueEnd() {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_prologue_end);
+  }
+
+  void SetEpilogueBegin() {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_epilogue_begin);
+  }
+
+  void SetISA(int isa) {
+    uses_dwarf3_features_ = true;
+    this->PushUint8(DW_LNS_set_isa);
+    this->PushUleb128(isa);
+  }
+
+  void EndSequence() {
+    this->PushUint8(0);
+    this->PushUleb128(1);
+    this->PushUint8(DW_LNE_end_sequence);
+    current_address_ = 0;
+    current_file_ = 1;
+    current_line_ = 1;
+  }
+
+  // Uncoditionally set address using the long encoding.
+  // This gives the linker opportunity to relocate the address.
+  void SetAddress(uint64_t absolute_address) {
+    DCHECK_GE(absolute_address, current_address_);
+    FactorCodeOffset(absolute_address);  // Check if it is factorable.
+    this->PushUint8(0);
+    if (use_64bit_address_) {
+      this->PushUleb128(1 + 8);
+      this->PushUint8(DW_LNE_set_address);
+      this->PushUint64(absolute_address);
+    } else {
+      this->PushUleb128(1 + 4);
+      this->PushUint8(DW_LNE_set_address);
+      this->PushUint32(absolute_address);
+    }
+    current_address_ = absolute_address;
+  }
+
+  void DefineFile(const char* filename,
+                  int directory_index,
+                  int modification_time,
+                  int file_size) {
+    int size = 1 +
+               strlen(filename) + 1 +
+               UnsignedLeb128Size(directory_index) +
+               UnsignedLeb128Size(modification_time) +
+               UnsignedLeb128Size(file_size);
+    this->PushUint8(0);
+    this->PushUleb128(size);
+    size_t start = data()->size();
+    this->PushUint8(DW_LNE_define_file);
+    this->PushString(filename);
+    this->PushUleb128(directory_index);
+    this->PushUleb128(modification_time);
+    this->PushUleb128(file_size);
+    DCHECK_EQ(start + size, data()->size());
+  }
+
+  // Compact address and line opcode.
+  void AddRow(uint64_t absolute_address, int absolute_line) {
+    DCHECK_GE(absolute_address, current_address_);
+
+    // If the address is definitely too far, use the long encoding.
+    uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_);
+    if (delta_address > UINT8_MAX) {
+      AdvancePC(absolute_address);
+      delta_address = 0;
+    }
+
+    // If the line is definitely too far, use the long encoding.
+    int delta_line = absolute_line - current_line_;
+    if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) {
+      AdvanceLine(absolute_line);
+      delta_line = 0;
+    }
+
+    // Both address and line should be reasonable now.  Use the short encoding.
+    int opcode = kOpcodeBase + (delta_line - kLineBase) +
+                 (static_cast<int>(delta_address) * kLineRange);
+    if (opcode > UINT8_MAX) {
+      // If the address is still too far, try to increment it by const amount.
+      int const_advance = (0xFF - kOpcodeBase) / kLineRange;
+      opcode -= (kLineRange * const_advance);
+      if (opcode <= UINT8_MAX) {
+        this->PushUint8(DW_LNS_const_add_pc);
+      } else {
+        // Give up and use long encoding for address.
+        AdvancePC(absolute_address);
+        // Still use the opcode to do line advance and copy.
+        opcode = kOpcodeBase + (delta_line - kLineBase);
+      }
+    }
+    DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF);
+    this->PushUint8(opcode);  // Special opcode.
+    current_line_ = absolute_line;
+    current_address_ = absolute_address;
+  }
+
+  int GetCodeFactorBits() const {
+    return code_factor_bits_;
+  }
+
+  uint64_t CurrentAddress() const {
+    return current_address_;
+  }
+
+  int CurrentFile() const {
+    return current_file_;
+  }
+
+  int CurrentLine() const {
+    return current_line_;
+  }
+
+  using Writer<Allocator>::data;
+
+  DebugLineOpCodeWriter(bool use64bitAddress,
+                        int codeFactorBits,
+                        const Allocator& alloc = Allocator())
+      : Writer<Allocator>(&opcodes_),
+        opcodes_(alloc),
+        uses_dwarf3_features_(false),
+        use_64bit_address_(use64bitAddress),
+        code_factor_bits_(codeFactorBits),
+        current_address_(0),
+        current_file_(1),
+        current_line_(1) {
+  }
+
+ private:
+  uint64_t FactorCodeOffset(uint64_t offset) const {
+    DCHECK_GE(code_factor_bits_, 0);
+    DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset);
+    return offset >> code_factor_bits_;
+  }
+
+  std::vector<uint8_t, Allocator> opcodes_;
+  bool uses_dwarf3_features_;
+  bool use_64bit_address_;
+  int code_factor_bits_;
+  uint64_t current_address_;
+  int current_file_;
+  int current_line_;
+
+  DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
new file mode 100644
index 0000000000..ec18e96b4b
--- /dev/null
+++ b/compiler/dwarf/dwarf_test.cc
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dwarf_test.h"
+
+#include "dwarf/debug_frame_opcode_writer.h"
+#include "dwarf/debug_info_entry_writer.h"
+#include "dwarf/debug_line_opcode_writer.h"
+#include "dwarf/headers.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace dwarf {
+
+// Run the tests only on host since we need objdump.
+#ifndef HAVE_ANDROID_OS
+
+TEST_F(DwarfTest, DebugFrame) {
+  const bool is64bit = false;
+
+  // Pick offset value which would catch Uleb vs Sleb errors.
+  const int offset = 40000;
+  ASSERT_EQ(UnsignedLeb128Size(offset / 4), 2u);
+  ASSERT_EQ(SignedLeb128Size(offset / 4), 3u);
+  DW_CHECK("Data alignment factor: -4");
+  const Reg reg(6);
+
+  // Test the opcodes in the order mentioned in the spec.
+  // There are usually several encoding variations of each opcode.
+  DebugFrameOpCodeWriter<> opcodes;
+  DW_CHECK("FDE");
+  int pc = 0;
+  for (int i : {0, 1, 0x3F, 0x40, 0xFF, 0x100, 0xFFFF, 0x10000}) {
+    pc += i;
+    opcodes.AdvancePC(pc);
+  }
+  DW_CHECK_NEXT("DW_CFA_advance_loc: 1 to 01000001");
+  DW_CHECK_NEXT("DW_CFA_advance_loc: 63 to 01000040");
+  DW_CHECK_NEXT("DW_CFA_advance_loc1: 64 to 01000080");
+  DW_CHECK_NEXT("DW_CFA_advance_loc1: 255 to 0100017f");
+  DW_CHECK_NEXT("DW_CFA_advance_loc2: 256 to 0100027f");
+  DW_CHECK_NEXT("DW_CFA_advance_loc2: 65535 to 0101027e");
+  DW_CHECK_NEXT("DW_CFA_advance_loc4: 65536 to 0102027e");
+  opcodes.DefCFA(reg, offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa: r6 (esi) ofs 40000");
+  opcodes.DefCFA(reg, -offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_sf: r6 (esi) ofs -40000");
+  opcodes.DefCFARegister(reg);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_register: r6 (esi)");
+  opcodes.DefCFAOffset(offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 40000");
+  opcodes.DefCFAOffset(-offset);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset_sf: -40000");
+  uint8_t expr[] = { 0 };
+  opcodes.DefCFAExpression(expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_def_cfa_expression");
+  opcodes.Undefined(reg);
+  DW_CHECK_NEXT("DW_CFA_undefined: r6 (esi)");
+  opcodes.SameValue(reg);
+  DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)");
+  opcodes.Offset(Reg(0x3F), -offset);
+  // Bad register likely means that it does not exist on x86,
+  // but we want to test high register numbers anyway.
+  DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000");
+  opcodes.Offset(Reg(0x40), -offset);
+  DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000");
+  opcodes.Offset(Reg(0x40), offset);
+  DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000");
+  opcodes.ValOffset(reg, -offset);
+  DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000");
+  opcodes.ValOffset(reg, offset);
+  DW_CHECK_NEXT("DW_CFA_val_offset_sf: r6 (esi) at cfa+40000");
+  opcodes.Register(reg, Reg(1));
+  DW_CHECK_NEXT("DW_CFA_register: r6 (esi) in r1 (ecx)");
+  opcodes.Expression(reg, expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_expression: r6 (esi)");
+  opcodes.ValExpression(reg, expr, arraysize(expr));
+  DW_CHECK_NEXT("DW_CFA_val_expression: r6 (esi)");
+  opcodes.Restore(Reg(0x3F));
+  DW_CHECK_NEXT("DW_CFA_restore: bad register: r63");
+  opcodes.Restore(Reg(0x40));
+  DW_CHECK_NEXT("DW_CFA_restore_extended: bad register: r64");
+  opcodes.Restore(reg);
+  DW_CHECK_NEXT("DW_CFA_restore: r6 (esi)");
+  opcodes.RememberState();
+  DW_CHECK_NEXT("DW_CFA_remember_state");
+  opcodes.RestoreState();
+  DW_CHECK_NEXT("DW_CFA_restore_state");
+  opcodes.Nop();
+  DW_CHECK_NEXT("DW_CFA_nop");
+
+  // Also test helpers.
+  opcodes.DefCFA(Reg(4), 100);  // ESP
+  DW_CHECK_NEXT("DW_CFA_def_cfa: r4 (esp) ofs 100");
+  opcodes.AdjustCFAOffset(8);
+  DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 108");
+  opcodes.RelOffset(Reg(0), 0);  // push R0
+  DW_CHECK_NEXT("DW_CFA_offset: r0 (eax) at cfa-108");
+  opcodes.RelOffset(Reg(1), 4);  // push R1
+  DW_CHECK_NEXT("DW_CFA_offset: r1 (ecx) at cfa-104");
+  opcodes.RelOffsetForMany(Reg(2), 8, 1 | (1 << 3), 4);  // push R2 and R5
+  DW_CHECK_NEXT("DW_CFA_offset: r2 (edx) at cfa-100");
+  DW_CHECK_NEXT("DW_CFA_offset: r5 (ebp) at cfa-96");
+  opcodes.RestoreMany(Reg(2), 1 | (1 << 3));  // pop R2 and R5
+  DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)");
+  DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
+
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteEhFrameCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &eh_frame_data_);
+  WriteEhFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(), &eh_frame_data_);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+// TODO: objdump seems to have trouble with 64bit CIE length.
+TEST_F(DwarfTest, DISABLED_DebugFrame64) {
+  constexpr bool is64bit = true;
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteEhFrameCIE(is64bit, Reg(16), initial_opcodes, &eh_frame_data_);
+  DebugFrameOpCodeWriter<> opcodes;
+  WriteEhFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
+                  opcodes.data(), &eh_frame_data_);
+  DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+TEST_F(DwarfTest, DebugLine) {
+  const bool is64bit = false;
+  const int code_factor_bits = 1;
+  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+
+  std::vector<std::string> include_directories;
+  include_directories.push_back("/path/to/source");
+  DW_CHECK("/path/to/source");
+
+  std::vector<FileEntry> files {
+    { "file0.c", 0, 1000, 2000 },
+    { "file1.c", 1, 1000, 2000 },
+    { "file2.c", 1, 1000, 2000 },
+  };
+  DW_CHECK("1\t0\t1000\t2000\tfile0.c");
+  DW_CHECK_NEXT("2\t1\t1000\t2000\tfile1.c");
+  DW_CHECK_NEXT("3\t1\t1000\t2000\tfile2.c");
+
+  DW_CHECK("Line Number Statements");
+  opcodes.SetAddress(0x01000000);
+  DW_CHECK_NEXT("Extended opcode 2: set Address to 0x1000000");
+  opcodes.AddRow();
+  DW_CHECK_NEXT("Copy");
+  opcodes.AdvancePC(0x01000100);
+  DW_CHECK_NEXT("Advance PC by 256 to 0x1000100");
+  opcodes.SetFile(2);
+  DW_CHECK_NEXT("Set File Name to entry 2 in the File Name Table");
+  opcodes.AdvanceLine(3);
+  DW_CHECK_NEXT("Advance Line by 2 to 3");
+  opcodes.SetColumn(4);
+  DW_CHECK_NEXT("Set column to 4");
+  opcodes.NegateStmt();
+  DW_CHECK_NEXT("Set is_stmt to 0");
+  opcodes.SetBasicBlock();
+  DW_CHECK_NEXT("Set basic block");
+  opcodes.SetPrologueEnd();
+  DW_CHECK_NEXT("Set prologue_end to true");
+  opcodes.SetEpilogueBegin();
+  DW_CHECK_NEXT("Set epilogue_begin to true");
+  opcodes.SetISA(5);
+  DW_CHECK_NEXT("Set ISA to 5");
+  opcodes.EndSequence();
+  DW_CHECK_NEXT("Extended opcode 1: End of Sequence");
+  opcodes.DefineFile("file.c", 0, 1000, 2000);
+  DW_CHECK_NEXT("Extended opcode 3: define new File Table entry");
+  DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName");
+  DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c");
+
+  WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+// DWARF has special one byte codes which advance PC and line at the same time.
+TEST_F(DwarfTest, DebugLineSpecialOpcodes) {
+  const bool is64bit = false;
+  const int code_factor_bits = 1;
+  uint32_t pc = 0x01000000;
+  int line = 1;
+  DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+  opcodes.SetAddress(pc);
+  size_t num_rows = 0;
+  DW_CHECK("Line Number Statements:");
+  DW_CHECK("Special opcode");
+  DW_CHECK("Advance PC by constant");
+  DW_CHECK("Decoded dump of debug contents of section .debug_line:");
+  DW_CHECK("Line number    Starting address");
+  for (int addr_delta = 0; addr_delta < 80; addr_delta += 2) {
+    for (int line_delta = 16; line_delta >= -16; --line_delta) {
+      pc += addr_delta;
+      line += line_delta;
+      opcodes.AddRow(pc, line);
+      num_rows++;
+      ASSERT_EQ(opcodes.CurrentAddress(), pc);
+      ASSERT_EQ(opcodes.CurrentLine(), line);
+      char expected[1024];
+      sprintf(expected, "%i           0x%x", line, pc);
+      DW_CHECK_NEXT(expected);
+    }
+  }
+  EXPECT_LT(opcodes.data()->size(), num_rows * 3);
+
+  std::vector<std::string> directories;
+  std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } };  // NOLINT
+  WriteDebugLineTable(directories, files, opcodes, &debug_line_data_);
+  CheckObjdumpOutput(is64bit, "-W -WL");
+}
+
+TEST_F(DwarfTest, DebugInfo) {
+  constexpr bool is64bit = false;
+  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
+  DW_CHECK("Contents of the .debug_info section:");
+  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
+  info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
+  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1010000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1020000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1030000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.EndTag();  // DW_TAG_compile_unit
+  // Test that previous list was properly terminated and empty children.
+  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.EndTag();  // DW_TAG_compile_unit
+
+  // The abbrev table is just side product, but check it as well.
+  DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)");
+  DW_CHECK("Contents of the .debug_abbrev section:");
+  DW_CHECK("1      DW_TAG_compile_unit    [has children]");
+  DW_CHECK_NEXT("DW_AT_producer     DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("2      DW_TAG_subprogram    [no children]");
+  DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
+
+  dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, &debug_info_data_);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
new file mode 100644
index 0000000000..dd5e0c286e
--- /dev/null
+++ b/compiler/dwarf/dwarf_test.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DWARF_TEST_H_
+#define ART_COMPILER_DWARF_DWARF_TEST_H_
+
+#include <cstring>
+#include <dirent.h>
+#include <memory>
+#include <set>
+#include <stdio.h>
+#include <string>
+#include <sys/types.h>
+
+#include "utils.h"
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "elf_builder.h"
+#include "gtest/gtest.h"
+#include "os.h"
+
+namespace art {
+namespace dwarf {
+
+#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__)
+#define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__)
+
+class DwarfTest : public CommonRuntimeTest {
+ public:
+  static constexpr bool kPrintObjdumpOutput = false;  // debugging.
+
+  struct ExpectedLine {
+    std::string substring;
+    bool next;
+    const char* at_file;
+    int at_line;
+  };
+
+  // Check that the objdump output contains given output.
+  // If next is true, it must be the next line.  Otherwise lines are skipped.
+  void Check(const char* substr, bool next, const char* at_file, int at_line) {
+    expected_lines_.push_back(ExpectedLine {substr, next, at_file, at_line});
+  }
+
+  static std::string GetObjdumpPath() {
+    const char* android_build_top = getenv("ANDROID_BUILD_TOP");
+    if (android_build_top != nullptr) {
+      std::string host_prebuilts = std::string(android_build_top) +
+                                   "/prebuilts/gcc/linux-x86/host/";
+      // Read the content of the directory.
+      std::set<std::string> entries;
+      DIR* dir = opendir(host_prebuilts.c_str());
+      if (dir != nullptr) {
+        struct dirent* entry;
+        while ((entry = readdir(dir)) != nullptr) {
+          if (strstr(entry->d_name, "linux-glibc")) {
+            entries.insert(host_prebuilts + entry->d_name);
+          }
+        }
+        closedir(dir);
+      }
+      // Strings are sorted so the last one should be the most recent version.
+      if (!entries.empty()) {
+        std::string path = *entries.rbegin() + "/x86_64-linux/bin/objdump";
+        struct stat st;
+        if (stat(path.c_str(), &st) == 0) {
+          return path;  // File exists.
+        }
+      }
+    }
+    ADD_FAILURE() << "Can not find prebuild objdump.";
+    return "objdump";  // Use the system objdump as fallback.
+  }
+
+  // Pretty-print the generated DWARF data using objdump.
+  template<typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn,
+           typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr>
+  std::vector<std::string> Objdump(bool is64bit, const char* args) {
+    // Write simple elf file with just the DWARF sections.
+    class NoCode : public CodeOutput {
+      virtual void SetCodeOffset(size_t) { }
+      virtual bool Write(OutputStream*) { return true; }
+    } code;
+    ScratchFile file;
+    InstructionSet isa = is64bit ? kX86_64 : kX86;
+    ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
+               Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr> builder(
+        &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false);
+    typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section;
+    if (!debug_info_data_.empty()) {
+      Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+      debug_info.SetBuffer(debug_info_data_);
+      builder.RegisterRawSection(debug_info);
+    }
+    if (!debug_abbrev_data_.empty()) {
+      Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+      debug_abbrev.SetBuffer(debug_abbrev_data_);
+      builder.RegisterRawSection(debug_abbrev);
+    }
+    if (!debug_str_data_.empty()) {
+      Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+      debug_str.SetBuffer(debug_str_data_);
+      builder.RegisterRawSection(debug_str);
+    }
+    if (!debug_line_data_.empty()) {
+      Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+      debug_line.SetBuffer(debug_line_data_);
+      builder.RegisterRawSection(debug_line);
+    }
+    if (!eh_frame_data_.empty()) {
+      Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+      eh_frame.SetBuffer(eh_frame_data_);
+      builder.RegisterRawSection(eh_frame);
+    }
+    builder.Init();
+    builder.Write();
+
+    // Read the elf file back using objdump.
+    std::vector<std::string> lines;
+    std::string cmd = GetObjdumpPath();
+    cmd = cmd + " " + args + " " + file.GetFilename() + " 2>&1";
+    FILE* output = popen(cmd.data(), "r");
+    char buffer[1024];
+    const char* line;
+    while ((line = fgets(buffer, sizeof(buffer), output)) != nullptr) {
+      if (kPrintObjdumpOutput) {
+        printf("%s", line);
+      }
+      if (line[0] != '\0' && line[0] != '\n') {
+        EXPECT_TRUE(strstr(line, "objdump: Error:") == nullptr) << line;
+        EXPECT_TRUE(strstr(line, "objdump: Warning:") == nullptr) << line;
+        std::string str(line);
+        if (str.back() == '\n') {
+          str.pop_back();
+        }
+        lines.push_back(str);
+      }
+    }
+    pclose(output);
+    return lines;
+  }
+
+  std::vector<std::string> Objdump(bool is64bit, const char* args) {
+    if (is64bit) {
+      return Objdump<Elf64_Word, Elf64_Sword, Elf64_Addr, Elf64_Dyn,
+          Elf64_Sym, Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr>(is64bit, args);
+    } else {
+      return Objdump<Elf32_Word, Elf32_Sword, Elf32_Addr, Elf32_Dyn,
+          Elf32_Sym, Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr>(is64bit, args);
+    }
+  }
+
+  // Compare objdump output to the recorded checks.
+  void CheckObjdumpOutput(bool is64bit, const char* args) {
+    std::vector<std::string> actual_lines = Objdump(is64bit, args);
+    auto actual_line = actual_lines.begin();
+    for (const ExpectedLine& expected_line : expected_lines_) {
+      const std::string& substring = expected_line.substring;
+      if (actual_line == actual_lines.end()) {
+        ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Seen end of output.";
+      } else if (expected_line.next) {
+        if (actual_line->find(substring) == std::string::npos) {
+          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Seen '" << actual_line->data() << "'.";
+        } else {
+          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+        }
+        actual_line++;
+      } else {
+        bool found = false;
+        for (auto it = actual_line; it < actual_lines.end(); it++) {
+          if (it->find(substring) != std::string::npos) {
+            actual_line = it;
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+            "Expected '" << substring << "'.\n" <<
+            "Not found anywhere in the rest of the output.";
+        } else {
+          // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+          actual_line++;
+        }
+      }
+    }
+  }
+
+  // Buffers which are going to assembled into ELF file and passed to objdump.
+  std::vector<uint8_t> eh_frame_data_;
+  std::vector<uint8_t> debug_info_data_;
+  std::vector<uint8_t> debug_abbrev_data_;
+  std::vector<uint8_t> debug_str_data_;
+  std::vector<uint8_t> debug_line_data_;
+
+  // The expected output of objdump.
+  std::vector<ExpectedLine> expected_lines_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DWARF_TEST_H_
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
new file mode 100644
index 0000000000..d866b91ae7
--- /dev/null
+++ b/compiler/dwarf/headers.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_HEADERS_H_
+#define ART_COMPILER_DWARF_HEADERS_H_
+
+#include "debug_frame_opcode_writer.h"
+#include "debug_info_entry_writer.h"
+#include "debug_line_opcode_writer.h"
+#include "register.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Write common information entry (CIE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameCIE(bool is64bit, Reg return_address_register,
+                     const DebugFrameOpCodeWriter<Allocator>& opcodes,
+                     std::vector<uint8_t>* eh_frame) {
+  Writer<> writer(eh_frame);
+  size_t cie_header_start_ = writer.data()->size();
+  if (is64bit) {
+    // TODO: This is not related to being 64bit.
+    writer.PushUint32(0xffffffff);
+    writer.PushUint64(0);  // Length placeholder.
+    writer.PushUint64(0);  // CIE id.
+  } else {
+    writer.PushUint32(0);  // Length placeholder.
+    writer.PushUint32(0);  // CIE id.
+  }
+  writer.PushUint8(1);   // Version.
+  writer.PushString("zR");
+  writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
+  writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
+  writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
+  writer.PushUleb128(1);  // z: Augmentation data size.
+  if (is64bit) {
+    writer.PushUint8(0x04);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
+  } else {
+    writer.PushUint8(0x03);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
+  }
+  writer.PushData(opcodes.data());
+  writer.Pad(is64bit ? 8 : 4);
+  if (is64bit) {
+    writer.UpdateUint64(cie_header_start_ + 4, writer.data()->size() - cie_header_start_ - 12);
+  } else {
+    writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
+  }
+}
+
+// Write frame description entry (FDE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameFDE(bool is64bit, size_t cie_offset,
+                     uint64_t initial_address, uint64_t address_range,
+                     const std::vector<uint8_t, Allocator>* opcodes,
+                     std::vector<uint8_t>* eh_frame) {
+  Writer<> writer(eh_frame);
+  size_t fde_header_start = writer.data()->size();
+  if (is64bit) {
+    // TODO: This is not related to being 64bit.
+    writer.PushUint32(0xffffffff);
+    writer.PushUint64(0);  // Length placeholder.
+    uint64_t cie_pointer = writer.data()->size() - cie_offset;
+    writer.PushUint64(cie_pointer);
+  } else {
+    writer.PushUint32(0);  // Length placeholder.
+    uint32_t cie_pointer = writer.data()->size() - cie_offset;
+    writer.PushUint32(cie_pointer);
+  }
+  if (is64bit) {
+    writer.PushUint64(initial_address);
+    writer.PushUint64(address_range);
+  } else {
+    writer.PushUint32(initial_address);
+    writer.PushUint32(address_range);
+  }
+  writer.PushUleb128(0);  // Augmentation data size.
+  writer.PushData(opcodes);
+  writer.Pad(is64bit ? 8 : 4);
+  if (is64bit) {
+    writer.UpdateUint64(fde_header_start + 4, writer.data()->size() - fde_header_start - 12);
+  } else {
+    writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
+  }
+}
+
+// Write compilation unit (CU) to .debug_info section.
+template<typename Allocator>
+void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
+                      const DebugInfoEntryWriter<Allocator>& entries,
+                      std::vector<uint8_t>* debug_info) {
+  Writer<> writer(debug_info);
+  size_t start = writer.data()->size();
+  writer.PushUint32(0);  // Length placeholder.
+  writer.PushUint16(3);  // Version.
+  writer.PushUint32(debug_abbrev_offset);
+  writer.PushUint8(entries.is64bit() ? 8 : 4);
+  writer.PushData(entries.data());
+  writer.UpdateUint32(start, writer.data()->size() - start - 4);
+}
+
+struct FileEntry {
+  std::string file_name;
+  int directory_index;
+  int modification_time;
+  int file_size;
+};
+
+// Write line table to .debug_line section.
+template<typename Allocator>
+void WriteDebugLineTable(const std::vector<std::string>& include_directories,
+                         const std::vector<FileEntry>& files,
+                         const DebugLineOpCodeWriter<Allocator>& opcodes,
+                         std::vector<uint8_t>* debug_line) {
+  Writer<> writer(debug_line);
+  size_t header_start = writer.data()->size();
+  writer.PushUint32(0);  // Section-length placeholder.
+  // Claim DWARF-2 version even though we use some DWARF-3 features.
+  // DWARF-2 consumers will ignore the unknown opcodes.
+  // This is what clang currently does.
+  writer.PushUint16(2);  // .debug_line version.
+  size_t header_length_pos = writer.data()->size();
+  writer.PushUint32(0);  // Header-length placeholder.
+  writer.PushUint8(1 << opcodes.GetCodeFactorBits());
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
+  writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
+  static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
+      0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
+  for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
+    writer.PushUint8(opcode_lengths[i]);
+  }
+  for (const std::string& directory : include_directories) {
+    writer.PushData(directory.data(), directory.size() + 1);
+  }
+  writer.PushUint8(0);  // Terminate include_directories list.
+  for (const FileEntry& file : files) {
+    writer.PushData(file.file_name.data(), file.file_name.size() + 1);
+    writer.PushUleb128(file.directory_index);
+    writer.PushUleb128(file.modification_time);
+    writer.PushUleb128(file.file_size);
+  }
+  writer.PushUint8(0);  // Terminate file list.
+  writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
+  writer.PushData(opcodes.data()->data(), opcodes.data()->size());
+  writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
+}
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_HEADERS_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
new file mode 100644
index 0000000000..fa666dffa9
--- /dev/null
+++ b/compiler/dwarf/register.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_REGISTER_H_
+#define ART_COMPILER_DWARF_REGISTER_H_
+
+namespace art {
+namespace dwarf {
+
+// Represents DWARF register.
+class Reg {
+ public:
+  explicit Reg(int reg_num) : num_(reg_num) { }
+  int num() const { return num_; }
+
+  // TODO: Arm S0–S31 register mapping is obsolescent.
+  //   We should use VFP-v3/Neon D0-D31 mapping instead.
+  //   However, D0 is aliased to pair of S0 and S1, so using that
+  //   mapping we can not easily say S0 is spilled and S1 is not.
+  //   There are ways around this in DWARF but they are complex.
+  //   It would be much simpler to always spill whole D registers.
+  //   Arm64 mapping is correct since we already do this there.
+
+  static Reg ArmCore(int num) { return Reg(num); }
+  static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
+  static Reg Arm64Core(int num) { return Reg(num); }
+  static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
+  static Reg MipsCore(int num) { return Reg(num); }
+  static Reg Mips64Core(int num) { return Reg(num); }
+  static Reg X86Core(int num) { return Reg(num); }
+  static Reg X86Fp(int num) { return Reg(21 + num); }
+  static Reg X86_64Core(int num) {
+    static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5};
+    return Reg(num < 8 ? map[num] : num);
+  }
+  static Reg X86_64Fp(int num) { return Reg(17 + num); }
+
+ private:
+  int num_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_REGISTER_H_
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
new file mode 100644
index 0000000000..3b9c55866a
--- /dev/null
+++ b/compiler/dwarf/writer.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_WRITER_H_
+#define ART_COMPILER_DWARF_WRITER_H_
+
+#include <vector>
+#include "leb128.h"
+#include "base/logging.h"
+#include "utils.h"
+
+namespace art {
+namespace dwarf {
+
+// The base class for all DWARF writers.
+template<typename Allocator = std::allocator<uint8_t>>
+class Writer {
+ public:
+  void PushUint8(int value) {
+    DCHECK_GE(value, 0);
+    DCHECK_LE(value, UINT8_MAX);
+    data_->push_back(value & 0xff);
+  }
+
+  void PushUint16(int value) {
+    DCHECK_GE(value, 0);
+    DCHECK_LE(value, UINT16_MAX);
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+  }
+
+  void PushUint32(uint32_t value) {
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+    data_->push_back((value >> 16) & 0xff);
+    data_->push_back((value >> 24) & 0xff);
+  }
+
+  void PushUint32(int value) {
+    DCHECK_GE(value, 0);
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushUint32(uint64_t value) {
+    DCHECK_LE(value, UINT32_MAX);
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushUint64(uint64_t value) {
+    data_->push_back((value >> 0) & 0xff);
+    data_->push_back((value >> 8) & 0xff);
+    data_->push_back((value >> 16) & 0xff);
+    data_->push_back((value >> 24) & 0xff);
+    data_->push_back((value >> 32) & 0xff);
+    data_->push_back((value >> 40) & 0xff);
+    data_->push_back((value >> 48) & 0xff);
+    data_->push_back((value >> 56) & 0xff);
+  }
+
+  void PushInt8(int value) {
+    DCHECK_GE(value, INT8_MIN);
+    DCHECK_LE(value, INT8_MAX);
+    PushUint8(static_cast<uint8_t>(value));
+  }
+
+  void PushInt16(int value) {
+    DCHECK_GE(value, INT16_MIN);
+    DCHECK_LE(value, INT16_MAX);
+    PushUint16(static_cast<uint16_t>(value));
+  }
+
+  void PushInt32(int value) {
+    PushUint32(static_cast<uint32_t>(value));
+  }
+
+  void PushInt64(int64_t value) {
+    PushUint64(static_cast<uint64_t>(value));
+  }
+
+  // Variable-length encoders.
+
+  void PushUleb128(uint32_t value) {
+    EncodeUnsignedLeb128(data_, value);
+  }
+
+  void PushUleb128(int value) {
+    DCHECK_GE(value, 0);
+    EncodeUnsignedLeb128(data_, value);
+  }
+
+  void PushSleb128(int value) {
+    EncodeSignedLeb128(data_, value);
+  }
+
+  // Miscellaneous functions.
+
+  void PushString(const char* value) {
+    data_->insert(data_->end(), value, value + strlen(value) + 1);
+  }
+
+  void PushData(const void* ptr, size_t size) {
+    const char* p = reinterpret_cast<const char*>(ptr);
+    data_->insert(data_->end(), p, p + size);
+  }
+
+  template<typename Allocator2>
+  void PushData(const std::vector<uint8_t, Allocator2>* buffer) {
+    data_->insert(data_->end(), buffer->begin(), buffer->end());
+  }
+
+  void UpdateUint32(size_t offset, uint32_t value) {
+    DCHECK_LT(offset + 3, data_->size());
+    (*data_)[offset + 0] = (value >> 0) & 0xFF;
+    (*data_)[offset + 1] = (value >> 8) & 0xFF;
+    (*data_)[offset + 2] = (value >> 16) & 0xFF;
+    (*data_)[offset + 3] = (value >> 24) & 0xFF;
+  }
+
+  void UpdateUint64(size_t offset, uint64_t value) {
+    DCHECK_LT(offset + 7, data_->size());
+    (*data_)[offset + 0] = (value >> 0) & 0xFF;
+    (*data_)[offset + 1] = (value >> 8) & 0xFF;
+    (*data_)[offset + 2] = (value >> 16) & 0xFF;
+    (*data_)[offset + 3] = (value >> 24) & 0xFF;
+    (*data_)[offset + 4] = (value >> 32) & 0xFF;
+    (*data_)[offset + 5] = (value >> 40) & 0xFF;
+    (*data_)[offset + 6] = (value >> 48) & 0xFF;
+    (*data_)[offset + 7] = (value >> 56) & 0xFF;
+  }
+
+  void UpdateUleb128(size_t offset, uint32_t value) {
+    DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size());
+    UpdateUnsignedLeb128(data_->data() + offset, value);
+  }
+
+  void Pop() {
+    return data_->pop_back();
+  }
+
+  void Pad(int alignment) {
+    DCHECK_NE(alignment, 0);
+    data_->resize(RoundUp(data_->size(), alignment), 0);
+  }
+
+  const std::vector<uint8_t, Allocator>* data() const {
+    return data_;
+  }
+
+  explicit Writer(std::vector<uint8_t, Allocator>* buffer) : data_(buffer) { }
+
+ private:
+  std::vector<uint8_t, Allocator>* data_;
+
+  DISALLOW_COPY_AND_ASSIGN(Writer);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_WRITER_H_
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 9ab3602606..124ed03c21 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -40,6 +40,7 @@ class ElfSectionBuilder : public ValueObject {
     section_.sh_addralign = align;
     section_.sh_entsize = entsize;
   }
+  ElfSectionBuilder(const ElfSectionBuilder&) = default;
 
   ~ElfSectionBuilder() {}
 
@@ -144,6 +145,7 @@ class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<Elf_Word, Elf_Sword,
     : ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>(sec_name, type, flags, link, info, align,
                                                        entsize) {
   }
+  ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default;
 
   ~ElfRawSectionBuilder() {}
 
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
new file mode 100644
index 0000000000..5e8e24b035
--- /dev/null
+++ b/compiler/elf_writer_debug.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "elf_writer_debug.h"
+
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "dex_file-inl.h"
+#include "dwarf/headers.h"
+#include "dwarf/register.h"
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+static void WriteEhFrameCIE(InstructionSet isa, std::vector<uint8_t>* eh_frame) {
+  // Scratch registers should be marked as undefined.  This tells the
+  // debugger that its value in the previous frame is not recoverable.
+  bool is64bit = Is64BitInstructionSet(isa);
+  switch (isa) {
+    case kArm:
+    case kThumb2: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::ArmCore(13), 0);  // R13(SP).
+      // core registers.
+      for (int reg = 0; reg < 13; reg++) {
+        if (reg < 4 || reg == 12) {
+          opcodes.Undefined(Reg::ArmCore(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmCore(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 16) {
+          opcodes.Undefined(Reg::ArmFp(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmFp(reg));
+        }
+      }
+      auto return_address_reg = Reg::ArmCore(14);  // R14(LR).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kArm64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::Arm64Core(31), 0);  // R31(SP).
+      // core registers.
+      for (int reg = 0; reg < 30; reg++) {
+        if (reg < 8 || reg == 16 || reg == 17) {
+          opcodes.Undefined(Reg::Arm64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 8 || reg >= 16) {
+          opcodes.Undefined(Reg::Arm64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Fp(reg));
+        }
+      }
+      auto return_address_reg = Reg::Arm64Core(30);  // R30(LR).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kMips:
+    case kMips64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::MipsCore(29), 0);  // R29(SP).
+      // core registers.
+      for (int reg = 1; reg < 26; reg++) {
+        if (reg < 16 || reg == 24 || reg == 25) {  // AT, V*, A*, T*.
+          opcodes.Undefined(Reg::MipsCore(reg));
+        } else {
+          opcodes.SameValue(Reg::MipsCore(reg));
+        }
+      }
+      auto return_address_reg = Reg::MipsCore(31);  // R31(RA).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kX86: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86Core(4), 4);   // R4(ESP).
+      opcodes.Offset(Reg::X86Core(8), -4);  // R8(EIP).
+      // core registers.
+      for (int reg = 0; reg < 8; reg++) {
+        if (reg <= 3) {
+          opcodes.Undefined(Reg::X86Core(reg));
+        } else if (reg == 4) {
+          // Stack pointer.
+        } else {
+          opcodes.SameValue(Reg::X86Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 8; reg++) {
+        opcodes.Undefined(Reg::X86Fp(reg));
+      }
+      auto return_address_reg = Reg::X86Core(8);  // R8(EIP).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kX86_64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86_64Core(4), 8);  // R4(RSP).
+      opcodes.Offset(Reg::X86_64Core(16), -8);  // R16(RIP).
+      // core registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg == 4) {
+          // Stack pointer.
+        } else if (reg < 12 && reg != 3 && reg != 5) {  // except EBX and EBP.
+          opcodes.Undefined(Reg::X86_64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg < 12) {
+          opcodes.Undefined(Reg::X86_64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Fp(reg));
+        }
+      }
+      auto return_address_reg = Reg::X86_64Core(16);  // R16(RIP).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kNone:
+      break;
+  }
+  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  UNREACHABLE();
+}
+
+/*
+ * @brief Generate the DWARF sections.
+ * @param oat_writer The Oat file Writer.
+ * @param eh_frame Call Frame Information.
+ * @param debug_info Compilation unit information.
+ * @param debug_abbrev Abbreviations used to generate dbg_info.
+ * @param debug_str Debug strings.
+ * @param debug_line Line number table.
+ */
+void WriteDebugSections(const CompilerDriver* compiler,
+                        const OatWriter* oat_writer,
+                        uint32_t text_section_offset,
+                        std::vector<uint8_t>* eh_frame,
+                        std::vector<uint8_t>* debug_info,
+                        std::vector<uint8_t>* debug_abbrev,
+                        std::vector<uint8_t>* debug_str,
+                        std::vector<uint8_t>* debug_line) {
+  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
+  const InstructionSet isa = compiler->GetInstructionSet();
+  uint32_t cunit_low_pc = static_cast<uint32_t>(-1);
+  uint32_t cunit_high_pc = 0;
+  for (auto method_info : method_infos) {
+    cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_);
+    cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_);
+  }
+
+  // Write .eh_frame section.
+  size_t cie_offset = eh_frame->size();
+  WriteEhFrameCIE(isa, eh_frame);
+  for (const OatWriter::DebugInfo& mi : method_infos) {
+    const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
+    if (opcodes != nullptr) {
+      WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset,
+                      text_section_offset + mi.low_pc_, mi.high_pc_ - mi.low_pc_,
+                      opcodes, eh_frame);
+    }
+  }
+
+  // Write .debug_info section.
+  size_t debug_abbrev_offset = debug_abbrev->size();
+  DebugInfoEntryWriter<> info(false /* 32 bit */, debug_abbrev);
+  info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+  info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
+  info.WriteData1(DW_AT_language, DW_LANG_Java);
+  info.WriteAddr(DW_AT_low_pc, cunit_low_pc + text_section_offset);
+  info.WriteAddr(DW_AT_high_pc, cunit_high_pc + text_section_offset);
+  info.WriteData4(DW_AT_stmt_list, debug_line->size());
+  for (auto method_info : method_infos) {
+    std::string method_name = PrettyMethod(method_info.dex_method_index_,
+                                           *method_info.dex_file_, true);
+    if (method_info.deduped_) {
+      // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+      // so that it will show up in a debuggerd crash report.
+      method_name += " [ DEDUPED ]";
+    }
+    info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+    info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
+    info.WriteAddr(DW_AT_low_pc, method_info.low_pc_ + text_section_offset);
+    info.WriteAddr(DW_AT_high_pc, method_info.high_pc_ + text_section_offset);
+    info.EndTag();  // DW_TAG_subprogram
+  }
+  info.EndTag();  // DW_TAG_compile_unit
+  WriteDebugInfoCU(debug_abbrev_offset, info, debug_info);
+
+  // TODO: in gdb info functions <regexp> - reports Java functions, but
+  // source file is <unknown> because .debug_line is formed as one
+  // compilation unit. To fix this it is possible to generate
+  // a separate compilation unit for every distinct Java source.
+  // Each of the these compilation units can have several non-adjacent
+  // method ranges.
+
+  // Write .debug_line section.
+  std::vector<FileEntry> files;
+  std::unordered_map<std::string, size_t> files_map;
+  std::vector<std::string> directories;
+  std::unordered_map<std::string, size_t> directories_map;
+  int code_factor_bits_ = 0;
+  int dwarf_isa = -1;
+  switch (isa) {
+    case kArm:  // arm actually means thumb2.
+    case kThumb2:
+      code_factor_bits_ = 1;  // 16-bit instuctions
+      dwarf_isa = 1;  // DW_ISA_ARM_thumb.
+      break;
+    case kArm64:
+    case kMips:
+    case kMips64:
+      code_factor_bits_ = 2;  // 32-bit instructions
+      break;
+    case kNone:
+    case kX86:
+    case kX86_64:
+      break;
+  }
+  DebugLineOpCodeWriter<> opcodes(false /* 32bit */, code_factor_bits_);
+  opcodes.SetAddress(text_section_offset + cunit_low_pc);
+  if (dwarf_isa != -1) {
+    opcodes.SetISA(dwarf_isa);
+  }
+  for (const OatWriter::DebugInfo& mi : method_infos) {
+    // Addresses in the line table should be unique and increasing.
+    if (mi.deduped_) {
+      continue;
+    }
+
+    struct DebugInfoCallbacks {
+      static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
+        auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
+        context->dex2line_.push_back({address, static_cast<int32_t>(line)});
+        return false;
+      }
+      DefaultSrcMap dex2line_;
+    } debug_info_callbacks;
+
+    const DexFile* dex = mi.dex_file_;
+    if (mi.code_item_ != nullptr) {
+      dex->DecodeDebugInfo(mi.code_item_,
+                           (mi.access_flags_ & kAccStatic) != 0,
+                           mi.dex_method_index_,
+                           DebugInfoCallbacks::NewPosition,
+                           nullptr,
+                           &debug_info_callbacks);
+    }
+
+    // Get and deduplicate directory and filename.
+    int file_index = 0;  // 0 - primary source file of the compilation.
+    auto& dex_class_def = dex->GetClassDef(mi.class_def_index_);
+    const char* source_file = dex->GetSourceFile(dex_class_def);
+    if (source_file != nullptr) {
+      std::string file_name(source_file);
+      size_t file_name_slash = file_name.find_last_of('/');
+      std::string class_name(dex->GetClassDescriptor(dex_class_def));
+      size_t class_name_slash = class_name.find_last_of('/');
+      std::string full_path(file_name);
+
+      // Guess directory from package name.
+      int directory_index = 0;  // 0 - current directory of the compilation.
+      if (file_name_slash == std::string::npos &&  // Just filename.
+          class_name.front() == 'L' &&  // Type descriptor for a class.
+          class_name_slash != std::string::npos) {  // Has package name.
+        std::string package_name = class_name.substr(1, class_name_slash - 1);
+        auto it = directories_map.find(package_name);
+        if (it == directories_map.end()) {
+          directory_index = 1 + directories.size();
+          directories_map.emplace(package_name, directory_index);
+          directories.push_back(package_name);
+        } else {
+          directory_index = it->second;
+        }
+        full_path = package_name + "/" + file_name;
+      }
+
+      // Add file entry.
+      auto it2 = files_map.find(full_path);
+      if (it2 == files_map.end()) {
+        file_index = 1 + files.size();
+        files_map.emplace(full_path, file_index);
+        files.push_back(FileEntry {
+          file_name,
+          directory_index,
+          0,  // Modification time - NA.
+          0,  // File size - NA.
+        });
+      } else {
+        file_index = it2->second;
+      }
+    }
+    opcodes.SetFile(file_index);
+
+    // Generate mapping opcodes from PC to Java lines.
+    const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
+    uint32_t low_pc = text_section_offset + mi.low_pc_;
+    if (file_index != 0 && !dex2line_map.empty()) {
+      bool first = true;
+      for (SrcMapElem pc2dex : mi.compiled_method_->GetSrcMappingTable()) {
+        uint32_t pc = pc2dex.from_;
+        int dex_pc = pc2dex.to_;
+        auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
+        if (dex2line.first) {
+          int line = dex2line.second;
+          if (first) {
+            first = false;
+            if (pc > 0) {
+              // Assume that any preceding code is prologue.
+              int first_line = dex2line_map.front().to_;
+              // Prologue is not a sensible place for a breakpoint.
+              opcodes.NegateStmt();
+              opcodes.AddRow(low_pc, first_line);
+              opcodes.NegateStmt();
+              opcodes.SetPrologueEnd();
+            }
+            opcodes.AddRow(low_pc + pc, line);
+          } else if (line != opcodes.CurrentLine()) {
+            opcodes.AddRow(low_pc + pc, line);
+          }
+        }
+      }
+    } else {
+      // line 0 - instruction cannot be attributed to any source line.
+      opcodes.AddRow(low_pc, 0);
+    }
+  }
+  opcodes.AdvancePC(text_section_offset + cunit_high_pc);
+  opcodes.EndSequence();
+  WriteDebugLineTable(directories, files, opcodes, debug_line);
+}
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
new file mode 100644
index 0000000000..39a99d6d38
--- /dev/null
+++ b/compiler/elf_writer_debug.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
+#define ART_COMPILER_ELF_WRITER_DEBUG_H_
+
+#include <vector>
+
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+void WriteDebugSections(const CompilerDriver* compiler,
+                        const OatWriter* oat_writer,
+                        uint32_t text_section_offset,
+                        std::vector<uint8_t>* eh_frame_data,
+                        std::vector<uint8_t>* debug_info_data,
+                        std::vector<uint8_t>* debug_abbrev_data,
+                        std::vector<uint8_t>* debug_str_data,
+                        std::vector<uint8_t>* debug_line_data);
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_ELF_WRITER_DEBUG_H_
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index a822b24cde..e9af25f293 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,11 +21,14 @@
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
+#include "compiled_method.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
-#include "dwarf.h"
+#include "driver/compiler_options.h"
 #include "elf_builder.h"
 #include "elf_file.h"
 #include "elf_utils.h"
+#include "elf_writer_debug.h"
 #include "file_output_stream.h"
 #include "globals.h"
 #include "leb128.h"
@@ -35,42 +38,6 @@
 
 namespace art {
 
-static void PushByte(std::vector<uint8_t>* buf, int data) {
-  buf->push_back(data & 0xff);
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const char* str, const char* def = nullptr) {
-  if (str == nullptr) {
-    str = def;
-  }
-
-  uint32_t offset = buf->size();
-  for (size_t i = 0; str[i] != '\0'; ++i) {
-    buf->push_back(str[i]);
-  }
-  buf->push_back('\0');
-  return offset;
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const std::string &str) {
-  uint32_t offset = buf->size();
-  buf->insert(buf->end(), str.begin(), str.end());
-  buf->push_back('\0');
-  return offset;
-}
-
-static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) {
-  (*buf)[offset+0] = data;
-  (*buf)[offset+1] = data >> 8;
-  (*buf)[offset+2] = data >> 16;
-  (*buf)[offset+3] = data >> 24;
-}
-
-static void PushHalf(std::vector<uint8_t>* buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-}
-
 template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
           typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
           typename Elf_Phdr, typename Elf_Shdr>
@@ -85,116 +52,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  if (is_x86_64) {
-    Push32(cfi_info, 0xffffffff);  // Indicates 64bit
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // CIE id: always 0.
-  if (is_x86_64) {
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(1, cfi_info);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(-8, cfi_info);
-  } else {
-    EncodeSignedLeb128(-4, cfi_info);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data.
-  if (is_x86_64) {
-    // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
-    cfi_info->push_back(0x04);
-  } else {
-    // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-    cfi_info->push_back(0x03);
-  }
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  if (is_x86_64) {
-    uint32_t length = cfi_info->size() - 12;
-    UpdateWord(cfi_info, 4, length);
-  } else {
-    uint32_t length = cfi_info->size() - 4;
-    UpdateWord(cfi_info, 0, length);
-  }
-  return cfi_info;
-}
-
-std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
-  switch (isa) {
-    case kX86:
-      return ConstructCIEFrameX86(false);
-    case kX86_64:
-      return ConstructCIEFrameX86(true);
-
-    default:
-      // Not implemented.
-      return nullptr;
-  }
-}
-
 class OatWriterWrapper FINAL : public CodeOutput {
  public:
   explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
@@ -253,7 +110,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
     return false;
   }
 
-  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() &&
+      !oat_writer->GetMethodDebugInfo().empty()) {
     WriteDebugSymbols(compiler_driver_, builder.get(), oat_writer);
   }
 
@@ -273,402 +131,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
   return builder->Write();
 }
 
-class LineTableGenerator FINAL : public Leb128Encoder {
- public:
-  LineTableGenerator(int line_base, int line_range, int opcode_base,
-                     std::vector<uint8_t>* data, uintptr_t current_address,
-                     size_t current_line)
-    : Leb128Encoder(data), line_base_(line_base), line_range_(line_range),
-      opcode_base_(opcode_base), current_address_(current_address),
-      current_line_(current_line), current_file_index_(0) {}
-
-  void PutDelta(unsigned delta_addr, int delta_line) {
-    current_line_ += delta_line;
-    current_address_ += delta_addr;
-
-    if (delta_line >= line_base_ && delta_line < line_base_ + line_range_) {
-      unsigned special_opcode = (delta_line - line_base_) +
-                                (line_range_ * delta_addr) + opcode_base_;
-      if (special_opcode <= 255) {
-        PushByte(data_, special_opcode);
-        return;
-      }
-    }
-
-    // generate standart opcode for address advance
-    if (delta_addr != 0) {
-      PushByte(data_, DW_LNS_advance_pc);
-      PushBackUnsigned(delta_addr);
-    }
-
-    // generate standart opcode for line delta
-    if (delta_line != 0) {
-      PushByte(data_, DW_LNS_advance_line);
-      PushBackSigned(delta_line);
-    }
-
-    // generate standart opcode for new LTN entry
-    PushByte(data_, DW_LNS_copy);
-  }
-
-  void SetAddr(uintptr_t addr) {
-    if (current_address_ == addr) {
-      return;
-    }
-
-    current_address_ = addr;
-
-    PushByte(data_, 0);  // extended opcode:
-    PushByte(data_, 1 + 4);  // length: opcode_size + address_size
-    PushByte(data_, DW_LNE_set_address);
-    Push32(data_, addr);
-  }
-
-  void SetLine(unsigned line) {
-    int delta_line = line - current_line_;
-    if (delta_line) {
-      current_line_ = line;
-      PushByte(data_, DW_LNS_advance_line);
-      PushBackSigned(delta_line);
-    }
-  }
-
-  void SetFile(unsigned file_index) {
-    if (current_file_index_ != file_index) {
-      current_file_index_ = file_index;
-      PushByte(data_, DW_LNS_set_file);
-      PushBackUnsigned(file_index);
-    }
-  }
-
-  void EndSequence() {
-    // End of Line Table Program
-    // 0(=ext), 1(len), DW_LNE_end_sequence
-    PushByte(data_, 0);
-    PushByte(data_, 1);
-    PushByte(data_, DW_LNE_end_sequence);
-  }
-
- private:
-  const int line_base_;
-  const int line_range_;
-  const int opcode_base_;
-  uintptr_t current_address_;
-  size_t current_line_;
-  unsigned current_file_index_;
-
-  DISALLOW_COPY_AND_ASSIGN(LineTableGenerator);
-};
-
-// TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff.
-static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2dex,
-                               DefaultSrcMap* result, uint32_t start_pc = 0) {
-  if (dbgstream == nullptr) {
-    return;
-  }
-
-  int adjopcode;
-  uint32_t dex_offset = 0;
-  uint32_t java_line = DecodeUnsignedLeb128(&dbgstream);
-
-  // skip parameters
-  for (uint32_t param_count = DecodeUnsignedLeb128(&dbgstream); param_count != 0; --param_count) {
-    DecodeUnsignedLeb128(&dbgstream);
-  }
-
-  for (bool is_end = false; is_end == false; ) {
-    uint8_t opcode = *dbgstream;
-    dbgstream++;
-    switch (opcode) {
-    case DexFile::DBG_END_SEQUENCE:
-      is_end = true;
-      break;
-
-    case DexFile::DBG_ADVANCE_PC:
-      dex_offset += DecodeUnsignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_ADVANCE_LINE:
-      java_line += DecodeSignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_START_LOCAL:
-    case DexFile::DBG_START_LOCAL_EXTENDED:
-      DecodeUnsignedLeb128(&dbgstream);
-      DecodeUnsignedLeb128(&dbgstream);
-      DecodeUnsignedLeb128(&dbgstream);
-
-      if (opcode == DexFile::DBG_START_LOCAL_EXTENDED) {
-        DecodeUnsignedLeb128(&dbgstream);
-      }
-      break;
-
-    case DexFile::DBG_END_LOCAL:
-    case DexFile::DBG_RESTART_LOCAL:
-      DecodeUnsignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_SET_PROLOGUE_END:
-    case DexFile::DBG_SET_EPILOGUE_BEGIN:
-    case DexFile::DBG_SET_FILE:
-      break;
-
-    default:
-      adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL;
-      dex_offset += adjopcode / DexFile::DBG_LINE_RANGE;
-      java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE);
-
-      for (SwapSrcMap::const_iterator found = pc2dex.FindByTo(dex_offset);
-          found != pc2dex.end() && found->to_ == static_cast<int32_t>(dex_offset);
-          found++) {
-        result->push_back({found->from_ + start_pc, static_cast<int32_t>(java_line)});
-      }
-      break;
-    }
-  }
-}
-
-/*
- * @brief Generate the DWARF debug_info and debug_abbrev sections
- * @param oat_writer The Oat file Writer.
- * @param dbg_info Compilation unit information.
- * @param dbg_abbrev Abbreviations used to generate dbg_info.
- * @param dbg_str Debug strings.
- */
-static void FillInCFIInformation(OatWriter* oat_writer,
-                                 std::vector<uint8_t>* dbg_info,
-                                 std::vector<uint8_t>* dbg_abbrev,
-                                 std::vector<uint8_t>* dbg_str,
-                                 std::vector<uint8_t>* dbg_line,
-                                 uint32_t text_section_offset) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
-
-  uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat");
-
-  // Create the debug_abbrev section with boilerplate information.
-  // We only care about low_pc and high_pc right now for the compilation
-  // unit and methods.
-
-  // Tag 1: Compilation unit: DW_TAG_compile_unit.
-  PushByte(dbg_abbrev, 1);
-  PushByte(dbg_abbrev, DW_TAG_compile_unit);
-
-  // There are children (the methods).
-  PushByte(dbg_abbrev, DW_CHILDREN_yes);
-
-  // DW_AT_producer DW_FORM_data1.
-  // REVIEW: we can get rid of dbg_str section if
-  // DW_FORM_string (immediate string) was used everywhere instead of
-  // DW_FORM_strp (ref to string from .debug_str section).
-  // DW_FORM_strp makes sense only if we reuse the strings.
-  PushByte(dbg_abbrev, DW_AT_producer);
-  PushByte(dbg_abbrev, DW_FORM_strp);
-
-  // DW_LANG_Java DW_FORM_data1.
-  PushByte(dbg_abbrev, DW_AT_language);
-  PushByte(dbg_abbrev, DW_FORM_data1);
-
-  // DW_AT_low_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, DW_AT_low_pc);
-  PushByte(dbg_abbrev, DW_FORM_addr);
-
-  // DW_AT_high_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, DW_AT_high_pc);
-  PushByte(dbg_abbrev, DW_FORM_addr);
-
-  if (dbg_line != nullptr) {
-    // DW_AT_stmt_list DW_FORM_sec_offset.
-    PushByte(dbg_abbrev, DW_AT_stmt_list);
-    PushByte(dbg_abbrev, DW_FORM_sec_offset);
-  }
-
-  // End of DW_TAG_compile_unit.
-  PushHalf(dbg_abbrev, 0);
-
-  // Tag 2: Compilation unit: DW_TAG_subprogram.
-  PushByte(dbg_abbrev, 2);
-  PushByte(dbg_abbrev, DW_TAG_subprogram);
-
-  // There are no children.
-  PushByte(dbg_abbrev, DW_CHILDREN_no);
-
-  // Name of the method.
-  PushByte(dbg_abbrev, DW_AT_name);
-  PushByte(dbg_abbrev, DW_FORM_strp);
-
-  // DW_AT_low_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, DW_AT_low_pc);
-  PushByte(dbg_abbrev, DW_FORM_addr);
-
-  // DW_AT_high_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, DW_AT_high_pc);
-  PushByte(dbg_abbrev, DW_FORM_addr);
-
-  // End of DW_TAG_subprogram.
-  PushHalf(dbg_abbrev, 0);
-
-  // Start the debug_info section with the header information
-  // 'unit_length' will be filled in later.
-  int cunit_length = dbg_info->size();
-  Push32(dbg_info, 0);
-
-  // 'version' - 3.
-  PushHalf(dbg_info, 3);
-
-  // Offset into .debug_abbrev section (always 0).
-  Push32(dbg_info, 0);
-
-  // Address size: 4.
-  PushByte(dbg_info, 4);
-
-  // Start the description for the compilation unit.
-  // This uses tag 1.
-  PushByte(dbg_info, 1);
-
-  // The producer is Android dex2oat.
-  Push32(dbg_info, producer_str_offset);
-
-  // The language is Java.
-  PushByte(dbg_info, DW_LANG_Java);
-
-  // low_pc and high_pc.
-  uint32_t cunit_low_pc = 0 - 1;
-  uint32_t cunit_high_pc = 0;
-  int cunit_low_pc_pos = dbg_info->size();
-  Push32(dbg_info, 0);
-  Push32(dbg_info, 0);
-
-  if (dbg_line == nullptr) {
-    for (size_t i = 0; i < method_info.size(); ++i) {
-      const OatWriter::DebugInfo &dbg = method_info[i];
-
-      cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_);
-
-      // Start a new TAG: subroutine (2).
-      PushByte(dbg_info, 2);
-
-      // Enter name, low_pc, high_pc.
-      Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
-      Push32(dbg_info, dbg.low_pc_ + text_section_offset);
-      Push32(dbg_info, dbg.high_pc_ + text_section_offset);
-    }
-  } else {
-    // TODO: in gdb info functions <regexp> - reports Java functions, but
-    // source file is <unknown> because .debug_line is formed as one
-    // compilation unit. To fix this it is possible to generate
-    // a separate compilation unit for every distinct Java source.
-    // Each of the these compilation units can have several non-adjacent
-    // method ranges.
-
-    // Line number table offset
-    Push32(dbg_info, dbg_line->size());
-
-    size_t lnt_length = dbg_line->size();
-    Push32(dbg_line, 0);
-
-    PushHalf(dbg_line, 4);  // LNT Version DWARF v4 => 4
-
-    size_t lnt_hdr_length = dbg_line->size();
-    Push32(dbg_line, 0);  // TODO: 64-bit uses 8-byte here
-
-    PushByte(dbg_line, 1);  // minimum_instruction_length (ubyte)
-    PushByte(dbg_line, 1);  // maximum_operations_per_instruction (ubyte) = always 1
-    PushByte(dbg_line, 1);  // default_is_stmt (ubyte)
-
-    const int8_t LINE_BASE = -5;
-    PushByte(dbg_line, LINE_BASE);  // line_base (sbyte)
-
-    const uint8_t LINE_RANGE = 14;
-    PushByte(dbg_line, LINE_RANGE);  // line_range (ubyte)
-
-    const uint8_t OPCODE_BASE = 13;
-    PushByte(dbg_line, OPCODE_BASE);  // opcode_base (ubyte)
-
-    // Standard_opcode_lengths (array of ubyte).
-    PushByte(dbg_line, 0); PushByte(dbg_line, 1); PushByte(dbg_line, 1);
-    PushByte(dbg_line, 1); PushByte(dbg_line, 1); PushByte(dbg_line, 0);
-    PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1);
-    PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1);
-
-    PushByte(dbg_line, 0);  // include_directories (sequence of path names) = EMPTY
-
-    // File_names (sequence of file entries).
-    std::unordered_map<const char*, size_t> files;
-    for (size_t i = 0; i < method_info.size(); ++i) {
-      const OatWriter::DebugInfo &dbg = method_info[i];
-      // TODO: add package directory to the file name
-      const char* file_name = dbg.src_file_name_ == nullptr ? "null" : dbg.src_file_name_;
-      auto found = files.find(file_name);
-      if (found == files.end()) {
-        size_t file_index = 1 + files.size();
-        files[file_name] = file_index;
-        PushStr(dbg_line, file_name);
-        PushByte(dbg_line, 0);  // include directory index = LEB128(0) - no directory
-        PushByte(dbg_line, 0);  // modification time = LEB128(0) - NA
-        PushByte(dbg_line, 0);  // file length = LEB128(0) - NA
-      }
-    }
-    PushByte(dbg_line, 0);  // End of file_names.
-
-    // Set lnt header length.
-    UpdateWord(dbg_line, lnt_hdr_length, dbg_line->size() - lnt_hdr_length - 4);
-
-    // Generate Line Number Program code, one long program for all methods.
-    LineTableGenerator line_table_generator(LINE_BASE, LINE_RANGE, OPCODE_BASE,
-                                            dbg_line, 0, 1);
-
-    DefaultSrcMap pc2java_map;
-    for (size_t i = 0; i < method_info.size(); ++i) {
-      const OatWriter::DebugInfo &dbg = method_info[i];
-      const char* file_name = (dbg.src_file_name_ == nullptr) ? "null" : dbg.src_file_name_;
-      size_t file_index = files[file_name];
-      DCHECK_NE(file_index, 0U) << file_name;
-
-      cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_);
-
-      // Start a new TAG: subroutine (2).
-      PushByte(dbg_info, 2);
-
-      // Enter name, low_pc, high_pc.
-      Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
-      Push32(dbg_info, dbg.low_pc_ + text_section_offset);
-      Push32(dbg_info, dbg.high_pc_ + text_section_offset);
-
-      GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(),
-                         &pc2java_map, dbg.low_pc_);
-      pc2java_map.DeltaFormat({dbg.low_pc_, 1}, dbg.high_pc_);
-      if (!pc2java_map.empty()) {
-        line_table_generator.SetFile(file_index);
-        line_table_generator.SetAddr(dbg.low_pc_ + text_section_offset);
-        line_table_generator.SetLine(1);
-        for (auto& src_map_elem : pc2java_map) {
-          line_table_generator.PutDelta(src_map_elem.from_, src_map_elem.to_);
-        }
-        pc2java_map.clear();
-      }
-    }
-
-    // End Sequence should have the highest address set.
-    line_table_generator.SetAddr(cunit_high_pc + text_section_offset);
-    line_table_generator.EndSequence();
-
-    // set lnt length
-    UpdateWord(dbg_line, lnt_length, dbg_line->size() - lnt_length - 4);
-  }
-
-  // One byte terminator
-  PushByte(dbg_info, 0);
-
-  // Fill in cunit's low_pc and high_pc.
-  UpdateWord(dbg_info, cunit_low_pc_pos, cunit_low_pc + text_section_offset);
-  UpdateWord(dbg_info, cunit_low_pc_pos + 4, cunit_high_pc + text_section_offset);
-
-  // We have now walked all the methods.  Fill in lengths.
-  UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4);
-}
-
 template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
           typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
           typename Elf_Phdr, typename Elf_Shdr>
@@ -678,18 +140,23 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
                               ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
                                          Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder,
                               OatWriter* oat_writer) {
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      ConstructCIEFrame(compiler_driver->GetInstructionSet()));
-
-  Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr;
-
   // Iterate over the compiled methods.
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
+  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
   ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab =
       builder->GetSymtabBuilder();
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), it->low_pc_, true,
-                      it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
+    if (it->deduped_) {
+      // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+      // so that it will show up in a debuggerd crash report.
+      name += " [ DEDUPED ]";
+    }
+
+    uint32_t low_pc = it->low_pc_;
+    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+    low_pc += it->compiled_method_->CodeDelta();
+    symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc,
+                      true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -697,109 +164,29 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
       symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
                         0, STB_LOCAL, STT_NOTYPE);
     }
-
-    // Include CFI for compiled method, if possible.
-    if (cfi_info.get() != nullptr) {
-      DCHECK(it->compiled_method_ != nullptr);
-
-      // Copy in the FDE, if present
-      const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
-      if (fde != nullptr) {
-        // Copy the information into cfi_info and then fix the address in the new copy.
-        int cur_offset = cfi_info->size();
-        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-        bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff;
-
-        // Set the 'CIE_pointer' field.
-        uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4);
-        uint64_t offset_to_update = CIE_pointer;
-        if (is_64bit) {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-          (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32;
-          (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40;
-          (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48;
-          (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56;
-        } else {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-        }
-
-        // Set the 'initial_location' field.
-        offset_to_update += is_64bit ? 8 : 4;
-        if (is_64bit) {
-          const uint64_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-          (*cfi_info)[offset_to_update+4] = quick_code_start >> 32;
-          (*cfi_info)[offset_to_update+5] = quick_code_start >> 40;
-          (*cfi_info)[offset_to_update+6] = quick_code_start >> 48;
-          (*cfi_info)[offset_to_update+7] = quick_code_start >> 56;
-        } else {
-          const uint32_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-        }
-      }
-    }
-  }
-
-  bool hasCFI = (cfi_info.get() != nullptr);
-  bool hasLineInfo = false;
-  for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) {
-    if (dbg_info.dbgstream_ != nullptr &&
-        !dbg_info.compiled_method_->GetSrcMappingTable().empty()) {
-      hasLineInfo = true;
-      break;
-    }
   }
 
-  if (hasLineInfo || hasCFI) {
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info",
-                                                                   SHT_PROGBITS,
-                                                                   0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_abbrev(".debug_abbrev",
-                                                                     SHT_PROGBITS,
-                                                                     0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_str(".debug_str",
-                                                                  SHT_PROGBITS,
-                                                                  0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_line(".debug_line",
-                                                                   SHT_PROGBITS,
-                                                                   0, nullptr, 0, 1, 0);
-
-    FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
-                         debug_abbrev.GetBuffer(), debug_str.GetBuffer(),
-                         hasLineInfo ? debug_line.GetBuffer() : nullptr,
-                         text_section_address);
-
-    builder->RegisterRawSection(debug_info);
-    builder->RegisterRawSection(debug_abbrev);
-
-    if (hasCFI) {
-      ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
-                                                                   SHT_PROGBITS,
-                                                                   SHF_ALLOC,
-                                                                   nullptr, 0, 4, 0);
-      eh_frame.SetBuffer(std::move(*cfi_info.get()));
-      builder->RegisterRawSection(eh_frame);
-    }
-
-    if (hasLineInfo) {
-      builder->RegisterRawSection(debug_line);
-    }
-
-    builder->RegisterRawSection(debug_str);
-  }
+  typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section;
+  Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+  Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+
+  dwarf::WriteDebugSections(compiler_driver,
+                            oat_writer,
+                            builder->GetTextBuilder().GetSection()->sh_addr,
+                            eh_frame.GetBuffer(),
+                            debug_info.GetBuffer(),
+                            debug_abbrev.GetBuffer(),
+                            debug_str.GetBuffer(),
+                            debug_line.GetBuffer());
+
+  builder->RegisterRawSection(eh_frame);
+  builder->RegisterRawSection(debug_info);
+  builder->RegisterRawSection(debug_abbrev);
+  builder->RegisterRawSection(debug_str);
+  builder->RegisterRawSection(debug_line);
 }
 
 // Explicit instantiations
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c1555aa523..1ede228c4f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -19,6 +19,7 @@
 #include <sys/stat.h>
 
 #include <memory>
+#include <numeric>
 #include <vector>
 
 #include "base/logging.h"
@@ -54,8 +55,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
-
-#include <numeric>
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 using ::art::mirror::ArtField;
 using ::art::mirror::ArtMethod;
@@ -238,7 +238,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot
   DCHECK(object != nullptr);
   DCHECK_NE(image_objects_offset_begin_, 0u);
 
-  size_t previous_bin_sizes = GetBinSizeSum(bin_slot.GetBin());  // sum sizes in [0..bin#)
+  size_t previous_bin_sizes = bin_slot_previous_sizes_[bin_slot.GetBin()];
   size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex();
   DCHECK_ALIGNED(new_offset, kObjectAlignment);
 
@@ -293,6 +293,28 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) {
   DCHECK(IsImageBinSlotAssigned(object));
 }
 
+void ImageWriter::PrepareDexCacheArraySlots() {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
+  size_t dex_cache_count = class_linker->GetDexCacheCount();
+  uint32_t size = 0u;
+  for (size_t idx = 0; idx < dex_cache_count; ++idx) {
+    DexCache* dex_cache = class_linker->GetDexCache(idx);
+    const DexFile* dex_file = dex_cache->GetDexFile();
+    dex_cache_array_starts_.Put(dex_file, size);
+    DexCacheArraysLayout layout(dex_file);
+    DCHECK(layout.Valid());
+    dex_cache_array_indexes_.Put(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
+    dex_cache_array_indexes_.Put(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
+    dex_cache_array_indexes_.Put(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
+    dex_cache_array_indexes_.Put(dex_cache->GetStrings(), size + layout.StringsOffset());
+    size += layout.Size();
+  }
+  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
+  // when AssignImageBinSlot() assigns their indexes out or order.
+  bin_slot_sizes_[kBinDexCacheArray] = size;
+}
+
 void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
   DCHECK(object != nullptr);
   size_t object_size = object->SizeOf();
@@ -307,6 +329,7 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
   // This means more pages will stay either clean or shared dirty (with zygote) and
   // the app will use less of its own (private) memory.
   Bin bin = kBinRegular;
+  size_t current_offset = 0u;
 
   if (kBinObjects) {
     //
@@ -316,6 +339,12 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
     // Memory analysis has determined that the following types of objects get dirtied
     // the most:
     //
+    // * Dex cache arrays are stored in a special bin. The arrays for each dex cache have
+    //   a fixed layout which helps improve generated code (using PC-relative addressing),
+    //   so we pre-calculate their offsets separately in PrepareDexCacheArraySlots().
+    //   Since these arrays are huge, most pages do not overlap other objects and it's not
+    //   really important where they are for the clean/dirty separation. Due to their
+    //   special PC-relative addressing, we arbitrarily keep them at the beginning.
     // * Class'es which are verified [their clinit runs only at runtime]
     //   - classes in general [because their static fields get overwritten]
     //   - initialized classes with all-final statics are unlikely to be ever dirty,
@@ -376,13 +405,21 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
       bin = kBinString;  // Strings are almost always immutable (except for object header).
+    } else if (object->IsObjectArray()) {
+      auto it = dex_cache_array_indexes_.find(object);
+      if (it != dex_cache_array_indexes_.end()) {
+        bin = kBinDexCacheArray;
+        current_offset = it->second;  // Use prepared offset defined by the DexCacheLayout.
+      }  // else bin = kBinRegular
     }  // else bin = kBinRegular
   }
 
-  size_t current_offset = bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
-  // Move the current bin size up to accomodate the object we just assigned a bin slot.
   size_t offset_delta = RoundUp(object_size, kObjectAlignment);  // 64-bit alignment
-  bin_slot_sizes_[bin] += offset_delta;
+  if (bin != kBinDexCacheArray) {
+    current_offset = bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
+    // Move the current bin size up to accomodate the object we just assigned a bin slot.
+    bin_slot_sizes_[bin] += offset_delta;
+  }
 
   BinSlot new_bin_slot(bin, current_offset);
   SetImageBinSlot(object, new_bin_slot);
@@ -887,8 +924,17 @@ void ImageWriter::CalculateNewObjectOffsets() {
   // TODO: Image spaces only?
   DCHECK_LT(image_end_, image_->Size());
   image_objects_offset_begin_ = image_end_;
+  // Prepare bin slots for dex cache arrays.
+  PrepareDexCacheArraySlots();
   // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
   heap->VisitObjects(WalkFieldsCallback, this);
+  // Calculate cumulative bin slot sizes.
+  size_t previous_sizes = 0u;
+  for (size_t i = 0; i != kBinSize; ++i) {
+    bin_slot_previous_sizes_[i] = previous_sizes;
+    previous_sizes += bin_slot_sizes_[i];
+  }
+  DCHECK_EQ(previous_sizes, GetBinSizeSum());
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
   DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
@@ -1187,8 +1233,8 @@ size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const {
 
 ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
   // These values may need to get updated if more bins are added to the enum Bin
-  static_assert(kBinBits == 3, "wrong number of bin bits");
-  static_assert(kBinShift == 29, "wrong number of shift");
+  static_assert(kBinBits == 4, "wrong number of bin bits");
+  static_assert(kBinShift == 28, "wrong number of shift");
   static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes");
 
   DCHECK_LT(GetBin(), kBinSize);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 53f5ce4545..71044f7b6e 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -52,7 +52,8 @@ class ImageWriter FINAL {
         quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
         quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
         target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        bin_slot_sizes_(), bin_slot_count_() {
+        bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(),
+        string_data_array_(nullptr) {
     CHECK_NE(image_begin, 0U);
   }
 
@@ -80,6 +81,14 @@ class ImageWriter FINAL {
     return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
   }
 
+  mirror::HeapReference<mirror::Object>* GetDexCacheArrayElementImageAddress(
+      const DexFile* dex_file, uint32_t offset) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto it = dex_cache_array_starts_.find(dex_file);
+    DCHECK(it != dex_cache_array_starts_.end());
+    return reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+        image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset);
+  }
+
   uint8_t* GetOatFileBegin() const {
     return image_begin_ + RoundUp(image_end_, kPageSize);
   }
@@ -101,6 +110,10 @@ class ImageWriter FINAL {
 
   // Classify different kinds of bins that objects end up getting packed into during image writing.
   enum Bin {
+    // Dex cache arrays have a special slot for PC-relative addressing. Since they are
+    // huge, and as such their dirtiness is not important for the clean/dirty separation,
+    // we arbitrarily keep them at the beginning.
+    kBinDexCacheArray,            // Object arrays belonging to dex cache.
     // Likely-clean:
     kBinString,                        // [String] Almost always immutable (except for obj header).
     kBinArtMethodsManagedInitialized,  // [ArtMethod] Not-native, and initialized. Unlikely to dirty
@@ -113,7 +126,6 @@ class ImageWriter FINAL {
     kBinClassVerified,            // Class verified, but initializers haven't been run
     kBinArtMethodNative,          // Art method that is actually native
     kBinArtMethodNotInitialized,  // Art method with a declaring class that wasn't initialized
-    // Don't care about other art methods since they don't dirty
     // Add more bins here if we add more segregation code.
     kBinSize,
   };
@@ -157,6 +169,7 @@ class ImageWriter FINAL {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void PrepareDexCacheArraySlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -282,6 +295,12 @@ class ImageWriter FINAL {
   // Memory mapped for generating the image.
   std::unique_ptr<MemMap> image_;
 
+  // Indexes for dex cache arrays (objects are inside of the image so that they don't move).
+  SafeMap<mirror::Object*, size_t> dex_cache_array_indexes_;
+
+  // The start offsets of the dex cache arrays.
+  SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
+
   // Saved hashes (objects are inside of the image so that they don't move).
   std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
 
@@ -309,6 +328,7 @@ class ImageWriter FINAL {
 
   // Bin slot tracking for dirty object packing
   size_t bin_slot_sizes_[kBinSize];  // Number of bytes in a bin
+  size_t bin_slot_previous_sizes_[kBinSize];  // Number of bytes in previous bins.
   size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
 
   void* string_data_array_;  // The backing for the interned strings.
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
new file mode 100644
index 0000000000..3a0d520e47
--- /dev/null
+++ b/compiler/jni/jni_cfi_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "jni/quick/calling_convention.h"
+#include "utils/assembler.h"
+
+#include "jni/jni_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class JNICFITest : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Description of simple method.
+    const bool is_static = true;
+    const bool is_synchronized = false;
+    const char* shorty = "IIFII";
+    std::unique_ptr<JniCallingConvention> jni_conv(
+        JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+    std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
+        ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+    const int frame_size(jni_conv->FrameSize());
+    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+
+    // Assemble the method.
+    std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
+                        callee_save_regs, mr_conv->EntrySpills());
+    jni_asm->IncreaseFrameSize(32);
+    jni_asm->DecreaseFrameSize(32);
+    jni_asm->RemoveFrame(frame_size, callee_save_regs);
+    jni_asm->EmitSlowPaths();
+    std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
+    MemoryRegion code(&actual_asm[0], actual_asm.size());
+    jni_asm->FinalizeInstructions(code);
+    ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size);
+    const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(JNICFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
new file mode 100644
index 0000000000..47e6f106ca
--- /dev/null
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -0,0 +1,505 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
+    0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
+    0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
+    0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
+    0x03, 0x8B, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x5C, 0x05, 0x50, 0x17, 0x05,
+    0x51, 0x16, 0x05, 0x52, 0x15, 0x05, 0x53, 0x14, 0x05, 0x54, 0x13, 0x05,
+    0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
+    0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
+    0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
+    0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+    0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
+    0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
+    0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
+    0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr}
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: .cfi_offset: r5 at cfa-28
+// 0x00000004: .cfi_offset: r6 at cfa-24
+// 0x00000004: .cfi_offset: r7 at cfa-20
+// 0x00000004: .cfi_offset: r8 at cfa-16
+// 0x00000004: .cfi_offset: r10 at cfa-12
+// 0x00000004: .cfi_offset: r11 at cfa-8
+// 0x00000004: .cfi_offset: r14 at cfa-4
+// 0x00000004: vpush.f32 {s16-s31}
+// 0x00000008: .cfi_def_cfa_offset: 92
+// 0x00000008: .cfi_offset_extended: r80 at cfa-92
+// 0x00000008: .cfi_offset_extended: r81 at cfa-88
+// 0x00000008: .cfi_offset_extended: r82 at cfa-84
+// 0x00000008: .cfi_offset_extended: r83 at cfa-80
+// 0x00000008: .cfi_offset_extended: r84 at cfa-76
+// 0x00000008: .cfi_offset_extended: r85 at cfa-72
+// 0x00000008: .cfi_offset_extended: r86 at cfa-68
+// 0x00000008: .cfi_offset_extended: r87 at cfa-64
+// 0x00000008: .cfi_offset_extended: r88 at cfa-60
+// 0x00000008: .cfi_offset_extended: r89 at cfa-56
+// 0x00000008: .cfi_offset_extended: r90 at cfa-52
+// 0x00000008: .cfi_offset_extended: r91 at cfa-48
+// 0x00000008: .cfi_offset_extended: r92 at cfa-44
+// 0x00000008: .cfi_offset_extended: r93 at cfa-40
+// 0x00000008: .cfi_offset_extended: r94 at cfa-36
+// 0x00000008: .cfi_offset_extended: r95 at cfa-32
+// 0x00000008: sub sp, sp, #36
+// 0x0000000a: .cfi_def_cfa_offset: 128
+// 0x0000000a: str r0, [sp, #0]
+// 0x0000000c: str.w r1, [sp, #132]
+// 0x00000010: vstr.f32 s0, [sp, #136]
+// 0x00000014: str.w r2, [sp, #140]
+// 0x00000018: str.w r3, [sp, #144]
+// 0x0000001c: sub sp, sp, #32
+// 0x0000001e: .cfi_def_cfa_offset: 160
+// 0x0000001e: add sp, sp, #32
+// 0x00000020: .cfi_def_cfa_offset: 128
+// 0x00000020: .cfi_remember_state
+// 0x00000020: add sp, sp, #36
+// 0x00000022: .cfi_def_cfa_offset: 92
+// 0x00000022: vpop.f32 {s16-s31}
+// 0x00000026: .cfi_def_cfa_offset: 28
+// 0x00000026: .cfi_restore_extended: r80
+// 0x00000026: .cfi_restore_extended: r81
+// 0x00000026: .cfi_restore_extended: r82
+// 0x00000026: .cfi_restore_extended: r83
+// 0x00000026: .cfi_restore_extended: r84
+// 0x00000026: .cfi_restore_extended: r85
+// 0x00000026: .cfi_restore_extended: r86
+// 0x00000026: .cfi_restore_extended: r87
+// 0x00000026: .cfi_restore_extended: r88
+// 0x00000026: .cfi_restore_extended: r89
+// 0x00000026: .cfi_restore_extended: r90
+// 0x00000026: .cfi_restore_extended: r91
+// 0x00000026: .cfi_restore_extended: r92
+// 0x00000026: .cfi_restore_extended: r93
+// 0x00000026: .cfi_restore_extended: r94
+// 0x00000026: .cfi_restore_extended: r95
+// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x0000002a: .cfi_restore_state
+// 0x0000002a: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9,
+    0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9,
+    0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9,
+    0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9,
+    0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD,
+    0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD,
+    0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA,
+    0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD,
+    0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1,
+    0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9,
+    0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9,
+    0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9,
+    0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9,
+    0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD,
+    0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD,
+    0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD,
+    0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C,
+    0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98,
+    0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94,
+    0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D,
+    0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A,
+    0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0,
+    0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC,
+    0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6,
+    0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06,
+    0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06,
+    0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+};
+// 0x00000000: sub sp, sp, #0xc0 (192)
+// 0x00000004: .cfi_def_cfa_offset: 192
+// 0x00000004: str lr, [sp, #184]
+// 0x00000008: .cfi_offset: r30 at cfa-8
+// 0x00000008: str x29, [sp, #176]
+// 0x0000000c: .cfi_offset: r29 at cfa-16
+// 0x0000000c: str x28, [sp, #168]
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: str x27, [sp, #160]
+// 0x00000014: .cfi_offset: r27 at cfa-32
+// 0x00000014: str x26, [sp, #152]
+// 0x00000018: .cfi_offset: r26 at cfa-40
+// 0x00000018: str x25, [sp, #144]
+// 0x0000001c: .cfi_offset: r25 at cfa-48
+// 0x0000001c: str x24, [sp, #136]
+// 0x00000020: .cfi_offset: r24 at cfa-56
+// 0x00000020: str x23, [sp, #128]
+// 0x00000024: .cfi_offset: r23 at cfa-64
+// 0x00000024: str x22, [sp, #120]
+// 0x00000028: .cfi_offset: r22 at cfa-72
+// 0x00000028: str x21, [sp, #112]
+// 0x0000002c: .cfi_offset: r21 at cfa-80
+// 0x0000002c: str x20, [sp, #104]
+// 0x00000030: .cfi_offset: r20 at cfa-88
+// 0x00000030: str d15, [sp, #96]
+// 0x00000034: .cfi_offset_extended: r79 at cfa-96
+// 0x00000034: str d14, [sp, #88]
+// 0x00000038: .cfi_offset_extended: r78 at cfa-104
+// 0x00000038: str d13, [sp, #80]
+// 0x0000003c: .cfi_offset_extended: r77 at cfa-112
+// 0x0000003c: str d12, [sp, #72]
+// 0x00000040: .cfi_offset_extended: r76 at cfa-120
+// 0x00000040: str d11, [sp, #64]
+// 0x00000044: .cfi_offset_extended: r75 at cfa-128
+// 0x00000044: str d10, [sp, #56]
+// 0x00000048: .cfi_offset_extended: r74 at cfa-136
+// 0x00000048: str d9, [sp, #48]
+// 0x0000004c: .cfi_offset_extended: r73 at cfa-144
+// 0x0000004c: str d8, [sp, #40]
+// 0x00000050: .cfi_offset_extended: r72 at cfa-152
+// 0x00000050: mov x21, tr
+// 0x00000054: str w0, [sp]
+// 0x00000058: str w1, [sp, #196]
+// 0x0000005c: str s0, [sp, #200]
+// 0x00000060: str w2, [sp, #204]
+// 0x00000064: str w3, [sp, #208]
+// 0x00000068: sub sp, sp, #0x20 (32)
+// 0x0000006c: .cfi_def_cfa_offset: 224
+// 0x0000006c: add sp, sp, #0x20 (32)
+// 0x00000070: .cfi_def_cfa_offset: 192
+// 0x00000070: .cfi_remember_state
+// 0x00000070: mov tr, x21
+// 0x00000074: ldr lr, [sp, #184]
+// 0x00000078: .cfi_restore: r30
+// 0x00000078: ldr x29, [sp, #176]
+// 0x0000007c: .cfi_restore: r29
+// 0x0000007c: ldr x28, [sp, #168]
+// 0x00000080: .cfi_restore: r28
+// 0x00000080: ldr x27, [sp, #160]
+// 0x00000084: .cfi_restore: r27
+// 0x00000084: ldr x26, [sp, #152]
+// 0x00000088: .cfi_restore: r26
+// 0x00000088: ldr x25, [sp, #144]
+// 0x0000008c: .cfi_restore: r25
+// 0x0000008c: ldr x24, [sp, #136]
+// 0x00000090: .cfi_restore: r24
+// 0x00000090: ldr x23, [sp, #128]
+// 0x00000094: .cfi_restore: r23
+// 0x00000094: ldr x22, [sp, #120]
+// 0x00000098: .cfi_restore: r22
+// 0x00000098: ldr x21, [sp, #112]
+// 0x0000009c: .cfi_restore: r21
+// 0x0000009c: ldr x20, [sp, #104]
+// 0x000000a0: .cfi_restore: r20
+// 0x000000a0: ldr d15, [sp, #96]
+// 0x000000a4: .cfi_restore_extended: r79
+// 0x000000a4: ldr d14, [sp, #88]
+// 0x000000a8: .cfi_restore_extended: r78
+// 0x000000a8: ldr d13, [sp, #80]
+// 0x000000ac: .cfi_restore_extended: r77
+// 0x000000ac: ldr d12, [sp, #72]
+// 0x000000b0: .cfi_restore_extended: r76
+// 0x000000b0: ldr d11, [sp, #64]
+// 0x000000b4: .cfi_restore_extended: r75
+// 0x000000b4: ldr d10, [sp, #56]
+// 0x000000b8: .cfi_restore_extended: r74
+// 0x000000b8: ldr d9, [sp, #48]
+// 0x000000bc: .cfi_restore_extended: r73
+// 0x000000bc: ldr d8, [sp, #40]
+// 0x000000c0: .cfi_restore_extended: r72
+// 0x000000c0: add sp, sp, #0xc0 (192)
+// 0x000000c4: .cfi_def_cfa_offset: 0
+// 0x000000c4: ret
+// 0x000000c8: .cfi_restore_state
+// 0x000000c8: .cfi_def_cfa_offset: 192
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
+    0x0F, 0x11, 0x44, 0x24, 0x38, 0x89, 0x54, 0x24, 0x3C, 0x89, 0x5C, 0x24,
+    0x40, 0x83, 0xC4, 0xE0, 0x83, 0xC4, 0x20, 0x83, 0xC4, 0x20, 0x5D, 0x5E,
+    0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x41, 0x0E, 0x08, 0x87, 0x02, 0x41, 0x0E, 0x0C, 0x86, 0x03, 0x41, 0x0E,
+    0x10, 0x85, 0x04, 0x43, 0x0E, 0x2C, 0x41, 0x0E, 0x30, 0x55, 0x0E, 0x50,
+    0x43, 0x0E, 0x30, 0x0A, 0x43, 0x0E, 0x10, 0x41, 0x0E, 0x0C, 0xC5, 0x41,
+    0x0E, 0x08, 0xC6, 0x41, 0x0E, 0x04, 0xC7, 0x41, 0x0B, 0x0E, 0x30,
+};
+// 0x00000000: push edi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r7 at cfa-8
+// 0x00000001: push esi
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r6 at cfa-12
+// 0x00000002: push ebp
+// 0x00000003: .cfi_def_cfa_offset: 16
+// 0x00000003: .cfi_offset: r5 at cfa-16
+// 0x00000003: add esp, -28
+// 0x00000006: .cfi_def_cfa_offset: 44
+// 0x00000006: push eax
+// 0x00000007: .cfi_def_cfa_offset: 48
+// 0x00000007: mov [esp + 52], ecx
+// 0x0000000b: movss [esp + 56], xmm0
+// 0x00000011: mov [esp + 60], edx
+// 0x00000015: mov [esp + 64], ebx
+// 0x00000019: add esp, -32
+// 0x0000001c: .cfi_def_cfa_offset: 80
+// 0x0000001c: add esp, 32
+// 0x0000001f: .cfi_def_cfa_offset: 48
+// 0x0000001f: .cfi_remember_state
+// 0x0000001f: add esp, 32
+// 0x00000022: .cfi_def_cfa_offset: 16
+// 0x00000022: pop ebp
+// 0x00000023: .cfi_def_cfa_offset: 12
+// 0x00000023: .cfi_restore: r5
+// 0x00000023: pop esi
+// 0x00000024: .cfi_def_cfa_offset: 8
+// 0x00000024: .cfi_restore: r6
+// 0x00000024: pop edi
+// 0x00000025: .cfi_def_cfa_offset: 4
+// 0x00000025: .cfi_restore: r7
+// 0x00000025: ret
+// 0x00000026: .cfi_restore_state
+// 0x00000026: .cfi_def_cfa_offset: 48
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, 0x55, 0x53, 0x48, 0x83,
+    0xEC, 0x48, 0xF2, 0x44, 0x0F, 0x11, 0x7C, 0x24, 0x40, 0xF2, 0x44, 0x0F,
+    0x11, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, 0x30, 0xF2,
+    0x44, 0x0F, 0x11, 0x64, 0x24, 0x28, 0x89, 0x3C, 0x24, 0x89, 0xB4, 0x24,
+    0x84, 0x00, 0x00, 0x00, 0xF3, 0x0F, 0x11, 0x84, 0x24, 0x88, 0x00, 0x00,
+    0x00, 0x89, 0x94, 0x24, 0x8C, 0x00, 0x00, 0x00, 0x89, 0x8C, 0x24, 0x90,
+    0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0xE0, 0x48, 0x83, 0xC4, 0x20, 0xF2,
+    0x44, 0x0F, 0x10, 0x64, 0x24, 0x28, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+    0x30, 0xF2, 0x44, 0x0F, 0x10, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x10,
+    0x7C, 0x24, 0x40, 0x48, 0x83, 0xC4, 0x48, 0x5B, 0x5D, 0x41, 0x5C, 0x41,
+    0x5D, 0x41, 0x5E, 0x41, 0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x42, 0x0E, 0x10, 0x8F, 0x04, 0x42, 0x0E, 0x18, 0x8E, 0x06, 0x42, 0x0E,
+    0x20, 0x8D, 0x08, 0x42, 0x0E, 0x28, 0x8C, 0x0A, 0x41, 0x0E, 0x30, 0x86,
+    0x0C, 0x41, 0x0E, 0x38, 0x83, 0x0E, 0x44, 0x0E, 0x80, 0x01, 0x47, 0xA0,
+    0x10, 0x47, 0x9F, 0x12, 0x47, 0x9E, 0x14, 0x47, 0x9D, 0x16, 0x65, 0x0E,
+    0xA0, 0x01, 0x44, 0x0E, 0x80, 0x01, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x47,
+    0xDF, 0x47, 0xE0, 0x44, 0x0E, 0x38, 0x41, 0x0E, 0x30, 0xC3, 0x41, 0x0E,
+    0x28, 0xC6, 0x42, 0x0E, 0x20, 0xCC, 0x42, 0x0E, 0x18, 0xCD, 0x42, 0x0E,
+    0x10, 0xCE, 0x42, 0x0E, 0x08, 0xCF, 0x41, 0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push r15
+// 0x00000002: .cfi_def_cfa_offset: 16
+// 0x00000002: .cfi_offset: r15 at cfa-16
+// 0x00000002: push r14
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: .cfi_offset: r14 at cfa-24
+// 0x00000004: push r13
+// 0x00000006: .cfi_def_cfa_offset: 32
+// 0x00000006: .cfi_offset: r13 at cfa-32
+// 0x00000006: push r12
+// 0x00000008: .cfi_def_cfa_offset: 40
+// 0x00000008: .cfi_offset: r12 at cfa-40
+// 0x00000008: push rbp
+// 0x00000009: .cfi_def_cfa_offset: 48
+// 0x00000009: .cfi_offset: r6 at cfa-48
+// 0x00000009: push rbx
+// 0x0000000a: .cfi_def_cfa_offset: 56
+// 0x0000000a: .cfi_offset: r3 at cfa-56
+// 0x0000000a: subq rsp, 72
+// 0x0000000e: .cfi_def_cfa_offset: 128
+// 0x0000000e: movsd [rsp + 64], xmm15
+// 0x00000015: .cfi_offset: r32 at cfa-64
+// 0x00000015: movsd [rsp + 56], xmm14
+// 0x0000001c: .cfi_offset: r31 at cfa-72
+// 0x0000001c: movsd [rsp + 48], xmm13
+// 0x00000023: .cfi_offset: r30 at cfa-80
+// 0x00000023: movsd [rsp + 40], xmm12
+// 0x0000002a: .cfi_offset: r29 at cfa-88
+// 0x0000002a: mov [rsp], edi
+// 0x0000002d: mov [rsp + 132], esi
+// 0x00000034: movss [rsp + 136], xmm0
+// 0x0000003d: mov [rsp + 140], edx
+// 0x00000044: mov [rsp + 144], ecx
+// 0x0000004b: addq rsp, -32
+// 0x0000004f: .cfi_def_cfa_offset: 160
+// 0x0000004f: addq rsp, 32
+// 0x00000053: .cfi_def_cfa_offset: 128
+// 0x00000053: .cfi_remember_state
+// 0x00000053: movsd xmm12, [rsp + 40]
+// 0x0000005a: .cfi_restore: r29
+// 0x0000005a: movsd xmm13, [rsp + 48]
+// 0x00000061: .cfi_restore: r30
+// 0x00000061: movsd xmm14, [rsp + 56]
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: movsd xmm15, [rsp + 64]
+// 0x0000006f: .cfi_restore: r32
+// 0x0000006f: addq rsp, 72
+// 0x00000073: .cfi_def_cfa_offset: 56
+// 0x00000073: pop rbx
+// 0x00000074: .cfi_def_cfa_offset: 48
+// 0x00000074: .cfi_restore: r3
+// 0x00000074: pop rbp
+// 0x00000075: .cfi_def_cfa_offset: 40
+// 0x00000075: .cfi_restore: r6
+// 0x00000075: pop r12
+// 0x00000077: .cfi_def_cfa_offset: 32
+// 0x00000077: .cfi_restore: r12
+// 0x00000077: pop r13
+// 0x00000079: .cfi_def_cfa_offset: 24
+// 0x00000079: .cfi_restore: r13
+// 0x00000079: pop r14
+// 0x0000007b: .cfi_def_cfa_offset: 16
+// 0x0000007b: .cfi_restore: r14
+// 0x0000007b: pop r15
+// 0x0000007d: .cfi_def_cfa_offset: 8
+// 0x0000007d: .cfi_restore: r15
+// 0x0000007d: ret
+// 0x0000007e: .cfi_restore_state
+// 0x0000007e: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF,
+    0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF,
+    0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF,
+    0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF,
+    0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA6, 0xAF, 0x4C, 0x00, 0xA7, 0xAF,
+    0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x18, 0x00, 0xA8, 0x8F,
+    0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F, 0x24, 0x00, 0xAB, 0x8F,
+    0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F, 0x30, 0x00, 0xAE, 0x8F,
+    0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F, 0x3C, 0x00, 0xBF, 0x8F,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03,
+    0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07,
+    0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x54, 0x0E, 0x60,
+    0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB,
+    0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r24, +56(r29)
+// 0x0000000c: .cfi_offset: r24 at cfa-8
+// 0x0000000c: sw r15, +52(r29)
+// 0x00000010: .cfi_offset: r15 at cfa-12
+// 0x00000010: sw r14, +48(r29)
+// 0x00000014: .cfi_offset: r14 at cfa-16
+// 0x00000014: sw r13, +44(r29)
+// 0x00000018: .cfi_offset: r13 at cfa-20
+// 0x00000018: sw r12, +40(r29)
+// 0x0000001c: .cfi_offset: r12 at cfa-24
+// 0x0000001c: sw r11, +36(r29)
+// 0x00000020: .cfi_offset: r11 at cfa-28
+// 0x00000020: sw r10, +32(r29)
+// 0x00000024: .cfi_offset: r10 at cfa-32
+// 0x00000024: sw r9, +28(r29)
+// 0x00000028: .cfi_offset: r9 at cfa-36
+// 0x00000028: sw r8, +24(r29)
+// 0x0000002c: .cfi_offset: r8 at cfa-40
+// 0x0000002c: sw r4, +0(r29)
+// 0x00000030: sw r5, +68(r29)
+// 0x00000034: sw r6, +72(r29)
+// 0x00000038: sw r7, +76(r29)
+// 0x0000003c: addiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 96
+// 0x00000040: addiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: .cfi_remember_state
+// 0x00000044: lw r8, +24(r29)
+// 0x00000048: .cfi_restore: r8
+// 0x00000048: lw r9, +28(r29)
+// 0x0000004c: .cfi_restore: r9
+// 0x0000004c: lw r10, +32(r29)
+// 0x00000050: .cfi_restore: r10
+// 0x00000050: lw r11, +36(r29)
+// 0x00000054: .cfi_restore: r11
+// 0x00000054: lw r12, +40(r29)
+// 0x00000058: .cfi_restore: r12
+// 0x00000058: lw r13, +44(r29)
+// 0x0000005c: .cfi_restore: r13
+// 0x0000005c: lw r14, +48(r29)
+// 0x00000060: .cfi_restore: r14
+// 0x00000060: lw r15, +52(r29)
+// 0x00000064: .cfi_restore: r15
+// 0x00000064: lw r24, +56(r29)
+// 0x00000068: .cfi_restore: r24
+// 0x00000068: lw r31, +60(r29)
+// 0x0000006c: .cfi_restore: r31
+// 0x0000006c: addiu r29, r29, 64
+// 0x00000070: .cfi_def_cfa_offset: 0
+// 0x00000070: jalr r0, r31
+// 0x00000074: nop
+// 0x00000078: .cfi_restore_state
+// 0x00000078: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF,
+    0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF,
+    0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF,
+    0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xAF, 0x64, 0x00, 0xA5, 0xAF,
+    0x68, 0x00, 0xAE, 0xE7, 0x6C, 0x00, 0xA7, 0xAF, 0x70, 0x00, 0xA8, 0xAF,
+    0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF,
+    0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF,
+    0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF,
+    0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06,
+    0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E,
+    0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E,
+    0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6,
+    0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48,
+    0x0B, 0x0E, 0x60,
+};
+// 0x00000000: daddiu r29, r29, -96
+// 0x00000004: .cfi_def_cfa_offset: 96
+// 0x00000004: sd r31, +88(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r30, +80(r29)
+// 0x0000000c: .cfi_offset: r30 at cfa-16
+// 0x0000000c: sd r28, +72(r29)
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: sd r23, +64(r29)
+// 0x00000014: .cfi_offset: r23 at cfa-32
+// 0x00000014: sd r22, +56(r29)
+// 0x00000018: .cfi_offset: r22 at cfa-40
+// 0x00000018: sd r21, +48(r29)
+// 0x0000001c: .cfi_offset: r21 at cfa-48
+// 0x0000001c: sd r20, +40(r29)
+// 0x00000020: .cfi_offset: r20 at cfa-56
+// 0x00000020: sd r19, +32(r29)
+// 0x00000024: .cfi_offset: r19 at cfa-64
+// 0x00000024: sd r18, +24(r29)
+// 0x00000028: .cfi_offset: r18 at cfa-72
+// 0x00000028: sw r4, +0(r29)
+// 0x0000002c: sw r5, +100(r29)
+// 0x00000030: swc1 f14, +104(r29)
+// 0x00000034: sw r7, +108(r29)
+// 0x00000038: sw r8, +112(r29)
+// 0x0000003c: daddiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 128
+// 0x00000040: daddiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 96
+// 0x00000044: .cfi_remember_state
+// 0x00000044: ld r18, +24(r29)
+// 0x00000048: .cfi_restore: r18
+// 0x00000048: ld r19, +32(r29)
+// 0x0000004c: .cfi_restore: r19
+// 0x0000004c: ld r20, +40(r29)
+// 0x00000050: .cfi_restore: r20
+// 0x00000050: ld r21, +48(r29)
+// 0x00000054: .cfi_restore: r21
+// 0x00000054: ld r22, +56(r29)
+// 0x00000058: .cfi_restore: r22
+// 0x00000058: ld r23, +64(r29)
+// 0x0000005c: .cfi_restore: r23
+// 0x0000005c: ld r28, +72(r29)
+// 0x00000060: .cfi_restore: r28
+// 0x00000060: ld r30, +80(r29)
+// 0x00000064: .cfi_restore: r30
+// 0x00000064: ld r31, +88(r29)
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: daddiu r29, r29, 96
+// 0x0000006c: .cfi_def_cfa_offset: 0
+// 0x0000006c: jr r31
+// 0x00000070: nop
+// 0x00000074: .cfi_restore_state
+// 0x00000074: .cfi_def_cfa_offset: 96
+
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 2d9e03a718..8a14038074 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
 #include "mirror/art_method.h"
@@ -93,7 +94,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  jni_asm->InitializeFrameDescriptionEntry();
+  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetIncludeDebugSymbols());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -105,6 +106,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
   const size_t frame_size(main_jni_conv->FrameSize());
   const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
   // 2. Set up the HandleScope
   mr_conv->ResetIterator(FrameOffset(frame_size));
@@ -424,7 +426,9 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
 
   // 16. Remove activation - need to restore callee save registers since the GC may have changed
   //     them.
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
   __ RemoveFrame(frame_size, callee_save_regs);
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
   // 17. Finalize code generation
   __ EmitSlowPaths();
@@ -432,19 +436,19 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  jni_asm->FinalizeFrameDescriptionEntry();
-  std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry());
-  ArrayRef<const uint8_t> cfi_ref;
-  if (fde != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*fde);
-  }
-  return CompiledMethod::SwapAllocCompiledMethodCFI(driver,
-                                                    instruction_set,
-                                                    ArrayRef<const uint8_t>(managed_code),
-                                                    frame_size,
-                                                    main_jni_conv->CoreSpillMask(),
-                                                    main_jni_conv->FpSpillMask(),
-                                                    cfi_ref);
+
+  return CompiledMethod::SwapAllocCompiledMethod(driver,
+                                                 instruction_set,
+                                                 ArrayRef<const uint8_t>(managed_code),
+                                                 frame_size,
+                                                 main_jni_conv->CoreSpillMask(),
+                                                 main_jni_conv->FpSpillMask(),
+                                                 nullptr,  // src_mapping_table.
+                                                 ArrayRef<const uint8_t>(),  // mapping_table.
+                                                 ArrayRef<const uint8_t>(),  // vmap_table.
+                                                 ArrayRef<const uint8_t>(),  // native_gc_map.
+                                                 ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
+                                                 ArrayRef<const LinkerPatch>());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 17325d6d49..d446867d32 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -126,25 +126,20 @@ const ManagedRegisterEntrySpills& Mips64ManagedRuntimeCallingConvention::EntrySp
 Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S0));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S1));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(SP));
   callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
   uint32_t result = 0;
-  result = 1 << S0 | 1 << S1 | 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 |
-           1 << S7 | 1 << GP | 1 << SP | 1 << S8;
+  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
   return result;
 }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
new file mode 100644
index 0000000000..ceace824ea
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm/relative_patcher_arm_base.h"
+
+#include "compiled_method.h"
+#include "oat.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset,
+                                              const CompiledMethod* compiled_method,
+                                              MethodReference method_ref) {
+  return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
+}
+
+uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
+  // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
+  // of code. To avoid any alignment discrepancies for the final chunk, we always align the
+  // offset after reserving of writing any chunk.
+  uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
+  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+                                                aligned_offset);
+  if (needs_thunk) {
+    thunk_locations_.push_back(aligned_offset);
+    offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+  }
+  return offset;
+}
+
+uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
+  if (current_thunk_to_write_ == thunk_locations_.size()) {
+    return offset;
+  }
+  uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
+  if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
+    ++current_thunk_to_write_;
+    uint32_t aligned_code_delta = aligned_offset - offset;
+    if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+      return 0u;
+    }
+    if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
+      return 0u;
+    }
+    uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
+    // Align after writing chunk, see the ReserveSpace() above.
+    offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
+    aligned_code_delta = offset - thunk_end_offset;
+    if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+      return 0u;
+    }
+  }
+  return offset;
+}
+
+ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
+                                               InstructionSet instruction_set,
+                                               std::vector<uint8_t> thunk_code,
+                                               uint32_t max_positive_displacement,
+                                               uint32_t max_negative_displacement)
+    : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code),
+      max_positive_displacement_(max_positive_displacement),
+      max_negative_displacement_(max_negative_displacement),
+      thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
+}
+
+uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset,
+                                                      const CompiledMethod* compiled_method,
+                                                      MethodReference method_ref,
+                                                      uint32_t max_extra_space) {
+  DCHECK(compiled_method->GetQuickCode() != nullptr);
+  uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
+  // Adjust for extra space required by the subclass.
+  next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
+  // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset.
+  // We need the MethodReference for that.
+  if (!unprocessed_patches_.empty() &&
+      next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
+    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+                                                  next_aligned_offset);
+    if (needs_thunk) {
+      // A single thunk will cover all pending patches.
+      unprocessed_patches_.clear();
+      uint32_t thunk_location = compiled_method->AlignCode(offset);
+      thunk_locations_.push_back(thunk_location);
+      offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+    }
+  }
+  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+    if (patch.Type() == kLinkerPatchCallRelative) {
+      unprocessed_patches_.emplace_back(patch.TargetMethod(),
+                                        quick_code_offset + patch.LiteralOffset());
+    }
+  }
+  return offset;
+}
+
+uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset,
+                                                       uint32_t target_offset) {
+  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+  uint32_t displacement = target_offset - patch_offset;
+  // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
+  if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
+    // Unwritten thunks have higher offsets, check if it's within range.
+    DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
+           thunk_locations_[current_thunk_to_write_] > patch_offset);
+    if (current_thunk_to_write_ != thunk_locations_.size() &&
+        thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
+      displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
+    } else {
+      // We must have a previous thunk then.
+      DCHECK_NE(current_thunk_to_write_, 0u);
+      DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
+      displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
+      DCHECK(displacement >= -max_negative_displacement_);
+    }
+  }
+  return displacement;
+}
+
+bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset,
+                                                        MethodReference method_ref,
+                                                        uint32_t next_aligned_offset) {
+  // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+  while (!unprocessed_patches_.empty()) {
+    MethodReference patch_ref = unprocessed_patches_.front().first;
+    uint32_t patch_offset = unprocessed_patches_.front().second;
+    DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+    if (patch_ref.dex_file == method_ref.dex_file &&
+        patch_ref.dex_method_index == method_ref.dex_method_index) {
+      DCHECK_GT(quick_code_offset, patch_offset);
+      if (quick_code_offset - patch_offset > max_positive_displacement_) {
+        return true;
+      }
+    } else {
+      auto result = provider_->FindMethodOffset(patch_ref);
+      if (!result.first) {
+        // If still unresolved, check if we have a thunk within range.
+        if (thunk_locations_.empty() ||
+            patch_offset - thunk_locations_.back() > max_negative_displacement_) {
+          return next_aligned_offset - patch_offset > max_positive_displacement_;
+        }
+      } else {
+        uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
+        if (target_offset >= patch_offset) {
+          DCHECK_LE(target_offset - patch_offset, max_positive_displacement_);
+        } else {
+          // When calling back, check if we have a thunk that's closer than the actual target.
+          if (!thunk_locations_.empty()) {
+            target_offset = std::max(target_offset, thunk_locations_.back());
+          }
+          if (patch_offset - target_offset > max_negative_displacement_) {
+            return true;
+          }
+        }
+      }
+    }
+    unprocessed_patches_.pop_front();
+  }
+  return false;
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
new file mode 100644
index 0000000000..f80dd962ce
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
+#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
+
+#include <deque>
+
+#include "linker/relative_patcher.h"
+#include "method_reference.h"
+
+namespace art {
+namespace linker {
+
+class ArmBaseRelativePatcher : public RelativePatcher {
+ public:
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+
+ protected:
+  ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
+                         InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
+                         uint32_t max_positive_displacement, uint32_t max_negative_displacement);
+
+  uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method,
+                                MethodReference method_ref, uint32_t max_extra_space);
+  uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset);
+
+ private:
+  bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref,
+                                  uint32_t next_aligned_offset);
+
+  RelativePatcherTargetProvider* const provider_;
+  const InstructionSet instruction_set_;
+  const std::vector<uint8_t> thunk_code_;
+  const uint32_t max_positive_displacement_;
+  const uint32_t max_negative_displacement_;
+  std::vector<uint32_t> thunk_locations_;
+  size_t current_thunk_to_write_;
+
+  // ReserveSpace() tracks unprocessed patches.
+  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+  std::deque<UnprocessedPatch> unprocessed_patches_;
+
+  friend class Arm64RelativePatcherTest;
+  friend class Thumb2RelativePatcherTest;
+
+  DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
new file mode 100644
index 0000000000..b17cbca2d2
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm/relative_patcher_thumb2.h"
+
+#include "compiled_method.h"
+#include "mirror/art_method.h"
+#include "utils/arm/assembler_thumb2.h"
+
+namespace art {
+namespace linker {
+
+Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
+    : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(),
+                             kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+}
+
+void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                                      uint32_t patch_offset, uint32_t target_offset) {
+  DCHECK_LE(literal_offset + 4u, code->size());
+  DCHECK_EQ(literal_offset & 1u, 0u);
+  DCHECK_EQ(patch_offset & 1u, 0u);
+  DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
+  uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+  DCHECK_EQ(displacement & 1u, 0u);
+  DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
+  uint32_t signbit = (displacement >> 31) & 0x1;
+  uint32_t i1 = (displacement >> 23) & 0x1;
+  uint32_t i2 = (displacement >> 22) & 0x1;
+  uint32_t imm10 = (displacement >> 12) & 0x03ff;
+  uint32_t imm11 = (displacement >> 1) & 0x07ff;
+  uint32_t j1 = i1 ^ (signbit ^ 1);
+  uint32_t j2 = i2 ^ (signbit ^ 1);
+  uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
+  value |= 0xf000d000;  // BL
+
+  // Check that we're just overwriting an existing BL.
+  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
+  // Write the new BL.
+  SetInsn32(code, literal_offset, value);
+}
+
+void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t literal_offset = patch.LiteralOffset();
+  uint32_t pc_literal_offset = patch.PcInsnOffset();
+  uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
+  uint32_t diff = target_offset - pc_base;
+
+  uint32_t insn = GetInsn32(code, literal_offset);
+  DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u);  // MOVW/MOVT, unpatched (imm16 == 0).
+  uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu);
+  uint32_t imm4 = (diff16 >> 12) & 0xfu;
+  uint32_t imm = (diff16 >> 11) & 0x1u;
+  uint32_t imm3 = (diff16 >> 8) & 0x7u;
+  uint32_t imm8 = diff16 & 0xffu;
+  insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8;
+  SetInsn32(code, literal_offset, insn);
+}
+
+std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
+  // The thunk just uses the entry point in the ArtMethod. This works even for calls
+  // to the generic JNI and interpreter trampolines.
+  arm::Thumb2Assembler assembler;
+  assembler.LoadFromOffset(
+      arm::kLoadWord, arm::PC, arm::R0,
+      mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+  assembler.bkpt(0);
+  std::vector<uint8_t> thunk_code(assembler.CodeSize());
+  MemoryRegion code(thunk_code.data(), thunk_code.size());
+  assembler.FinalizeInstructions(code);
+  return thunk_code;
+}
+
+void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+  DCHECK_LE(offset + 4u, code->size());
+  DCHECK_EQ(offset & 1u, 0u);
+  uint8_t* addr = &(*code)[offset];
+  addr[0] = (value >> 16) & 0xff;
+  addr[1] = (value >> 24) & 0xff;
+  addr[2] = (value >> 0) & 0xff;
+  addr[3] = (value >> 8) & 0xff;
+}
+
+uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
+  DCHECK_LE(offset + 4u, code.size());
+  DCHECK_EQ(offset & 1u, 0u);
+  const uint8_t* addr = &code[offset];
+  return
+      (static_cast<uint32_t>(addr[0]) << 16) +
+      (static_cast<uint32_t>(addr[1]) << 24) +
+      (static_cast<uint32_t>(addr[2]) << 0)+
+      (static_cast<uint32_t>(addr[3]) << 8);
+}
+
+template <typename Alloc>
+uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+  return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
new file mode 100644
index 0000000000..2d474c2db0
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
+#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
+
+#include "linker/arm/relative_patcher_arm_base.h"
+
+namespace art {
+namespace linker {
+
+class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
+ public:
+  explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
+
+  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ private:
+  static std::vector<uint8_t> CompileThunkCode();
+
+  void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
+
+  template <typename Alloc>
+  static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
+  // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
+  static constexpr int32_t kPcDisplacement = 4;
+
+  // Maximum positive and negative displacement measured from the patch location.
+  // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
+  // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
+  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
+  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
+
+  DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
new file mode 100644
index 0000000000..a057a4cf16
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/arm/relative_patcher_thumb2.h"
+
+namespace art {
+namespace linker {
+
+class Thumb2RelativePatcherTest : public RelativePatcherTest {
+ public:
+  Thumb2RelativePatcherTest() : RelativePatcherTest(kThumb2, "default") { }
+
+ protected:
+  static const uint8_t kCallRawCode[];
+  static const ArrayRef<const uint8_t> kCallCode;
+  static const uint8_t kNopRawCode[];
+  static const ArrayRef<const uint8_t> kNopCode;
+
+  // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
+  static constexpr uint32_t kBlPlus0 = 0xf000f800;
+  static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+
+  // Special BL values.
+  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
+  static constexpr uint32_t kBlMinusMax = 0xf400d000;
+
+  bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
+                             const ArrayRef<const LinkerPatch>& method1_patches,
+                             const ArrayRef<const uint8_t>& method3_code,
+                             const ArrayRef<const LinkerPatch>& method3_patches,
+                             uint32_t distance_without_thunks) {
+    CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
+    const uint32_t method1_offset =
+        CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
+
+    // We want to put the method3 at a very precise offset.
+    const uint32_t method3_offset = method1_offset + distance_without_thunks;
+    CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader)));
+
+    // Calculate size of method2 so that we put method3 at the correct place.
+    const uint32_t method2_offset =
+        CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
+        sizeof(OatQuickMethodHeader);
+    const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
+    std::vector<uint8_t> method2_raw_code(method2_size);
+    ArrayRef<const uint8_t> method2_code(method2_raw_code);
+    AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>());
+
+    AddCompiledMethod(MethodRef(3u), method3_code, method3_patches);
+
+    Link();
+
+    // Check assumptions.
+    CHECK_EQ(GetMethodOffset(1), method1_offset);
+    CHECK_EQ(GetMethodOffset(2), method2_offset);
+    auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3));
+    CHECK(result3.first);
+    // There may be a thunk before method2.
+    if (result3.second == method3_offset + 1 /* thumb mode */) {
+      return false;  // No thunk.
+    } else {
+      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
+      CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+      return true;   // Thunk present.
+    }
+  }
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    CHECK_NE(result.second & 1u, 0u);
+    return result.second - 1 /* thumb mode */;
+  }
+
+  uint32_t ThunkSize() {
+    return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size();
+  }
+
+  bool CheckThunk(uint32_t thunk_offset) {
+    Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get());
+    ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+    if (output_.size() < thunk_offset + expected_code.size()) {
+      LOG(ERROR) << "output_.size() == " << output_.size() << " < "
+          << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
+      return false;
+    }
+    ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
+    if (linked_code == expected_code) {
+      return true;
+    }
+    // Log failure info.
+    DumpDiff(expected_code, linked_code);
+    return false;
+  }
+
+  std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
+    std::vector<uint8_t> result;
+    result.reserve(num_nops * 2u + 4u);
+    for (size_t i = 0; i != num_nops; ++i) {
+      result.push_back(0x00);
+      result.push_back(0xbf);
+    }
+    result.push_back(static_cast<uint8_t>(bl >> 16));
+    result.push_back(static_cast<uint8_t>(bl >> 24));
+    result.push_back(static_cast<uint8_t>(bl));
+    result.push_back(static_cast<uint8_t>(bl >> 8));
+    return result;
+  }
+
+  void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+    static const uint8_t raw_code[] = {
+        0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
+        0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
+        0x78, 0x44,               // ADD r0, pc
+    };
+    constexpr uint32_t pc_insn_offset = 8u;
+    const ArrayRef<const uint8_t> code(raw_code);
+    LinkerPatch patches[] = {
+        LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
+        LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
+    };
+    AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
+    uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
+    // Distribute the bits of the diff between the MOVW and MOVT:
+    uint32_t diffw = diff & 0xffffu;
+    uint32_t difft = diff >> 16;
+    uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
+        ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
+    uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
+        ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
+    const uint8_t expected_code[] = {
+        static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+        static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+        static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+        static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+        0x78, 0x44,
+    };
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
+};
+
+const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
+    0x00, 0xf0, 0x00, 0xf8
+};
+
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = {
+    0x00, 0xbf
+};
+
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode);
+
+TEST_F(Thumb2RelativePatcherTest, CallSelf) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  static const uint8_t expected_code[] = {
+      0xff, 0xf7, 0xfe, 0xff
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOther) {
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+  LinkerPatch method2_patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t method2_offset = GetMethodOffset(2u);
+  uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */);
+  ASSERT_EQ(diff_after & 1u, 0u);
+  ASSERT_LT(diff_after >> 1, 1u << 8);  // Simple encoding, (diff_after >> 1) fits into 8 bits.
+  static const uint8_t method1_expected_code[] = {
+      0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+  uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */);
+  ASSERT_EQ(diff_before & 1u, 0u);
+  ASSERT_GE(diff_before, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0.
+  auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t diff = kTrampolineOffset - (method1_offset + 4u);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_GE(diff, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
+  auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
+  auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method1_code(method1_raw_code);
+  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
+  };
+
+  constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
+                                            kNopCode, ArrayRef<const LinkerPatch>(),
+                                            bl_offset_in_method1 + max_positive_disp);
+  ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
+
+  // Check linked code.
+  auto expected_code = GenNopsAndBl(3u, kBlPlusMax);
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) {
+  auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method3 = 2u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method3_code(method3_raw_code);
+  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+  LinkerPatch method3_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+                                            method3_code, method3_patches,
+                                            just_over_max_negative_disp - bl_offset_in_method3);
+  ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
+
+  // Check linked code.
+  auto expected_code = GenNopsAndBl(2u, kBlMinusMax);
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) {
+  auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method1 = 2u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method1_code(method1_raw_code);
+  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
+  };
+
+  constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
+                                            kNopCode, ArrayRef<const LinkerPatch>(),
+                                            bl_offset_in_method1 + just_over_max_positive_disp);
+  ASSERT_TRUE(thunk_in_gap);
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t method3_offset = GetMethodOffset(3u);
+  uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
+  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
+  uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+  ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
+  uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_GE(diff, 16 * MB - (1u << 9));  // Simple encoding, unknown bits fit into the low 8 bits.
+  auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  CheckThunk(thunk_offset);
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
+  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method3_code(method3_raw_code);
+  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+  LinkerPatch method3_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+                                            method3_code, method3_patches,
+                                            just_over_max_negative_disp - bl_offset_in_method3);
+  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
+
+  // Check linked code.
+  uint32_t method3_offset = GetMethodOffset(3u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
+  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
+  TestDexCachereference(0x00ff0000u, 0x00fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
+  TestDexCachereference(0x02ff0000u, 0x05fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
+  TestDexCachereference(0x08ff0000u, 0x08fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
+  TestDexCachereference(0xd0ff0000u, 0x60fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
new file mode 100644
index 0000000000..72ddf07089
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm64/relative_patcher_arm64.h"
+
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "mirror/art_method.h"
+#include "utils/arm64/assembler_arm64.h"
+#include "oat.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
+                                           const Arm64InstructionSetFeatures* features)
+    : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(),
+                             kMaxPositiveDisplacement, kMaxNegativeDisplacement),
+      fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()),
+      reserved_adrp_thunks_(0u),
+      processed_adrp_thunks_(0u) {
+  if (fix_cortex_a53_843419_) {
+    adrp_thunk_locations_.reserve(16u);
+    current_method_thunks_.reserve(16u * kAdrpThunkSize);
+  }
+}
+
+uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset,
+                                            const CompiledMethod* compiled_method,
+                                            MethodReference method_ref) {
+  if (!fix_cortex_a53_843419_) {
+    DCHECK(adrp_thunk_locations_.empty());
+    return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
+  }
+
+  // Add thunks for previous method if any.
+  if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
+    size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
+    offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks;
+    reserved_adrp_thunks_ = adrp_thunk_locations_.size();
+  }
+
+  // Count the number of ADRP insns as the upper bound on the number of thunks needed
+  // and use it to reserve space for other linker patches.
+  size_t num_adrp = 0u;
+  DCHECK(compiled_method != nullptr);
+  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+    if (patch.Type() == kLinkerPatchDexCacheArray &&
+        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+      ++num_adrp;
+    }
+  }
+  offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp);
+  if (num_adrp == 0u) {
+    return offset;
+  }
+
+  // Now that we have the actual offset where the code will be placed, locate the ADRP insns
+  // that actually require the thunk.
+  uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+  ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+  uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
+  DCHECK(compiled_method != nullptr);
+  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+    if (patch.Type() == kLinkerPatchDexCacheArray &&
+        patch.LiteralOffset() == patch.PcInsnOffset()) {  // ADRP patch
+      uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
+      if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
+        adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
+        thunk_offset += kAdrpThunkSize;
+      }
+    }
+  }
+  return offset;
+}
+
+uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  if (!fix_cortex_a53_843419_) {
+    DCHECK(adrp_thunk_locations_.empty());
+  } else {
+    // Add thunks for the last method if any.
+    if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
+      size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
+      offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks;
+      reserved_adrp_thunks_ = adrp_thunk_locations_.size();
+    }
+  }
+  return ArmBaseRelativePatcher::ReserveSpaceEnd(offset);
+}
+
+uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
+  if (fix_cortex_a53_843419_) {
+    if (!current_method_thunks_.empty()) {
+      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64);
+      if (kIsDebugBuild) {
+        CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+        size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
+        CHECK_LE(num_thunks, processed_adrp_thunks_);
+        for (size_t i = 0u; i != num_thunks; ++i) {
+          const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i];
+          CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize);
+        }
+      }
+      uint32_t aligned_code_delta = aligned_offset - offset;
+      if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+        return 0u;
+      }
+      if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) {
+        return 0u;
+      }
+      offset = aligned_offset + current_method_thunks_.size();
+      current_method_thunks_.clear();
+    }
+  }
+  return ArmBaseRelativePatcher::WriteThunks(out, offset);
+}
+
+void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                                     uint32_t patch_offset, uint32_t target_offset) {
+  DCHECK_LE(literal_offset + 4u, code->size());
+  DCHECK_EQ(literal_offset & 3u, 0u);
+  DCHECK_EQ(patch_offset & 3u, 0u);
+  DCHECK_EQ(target_offset & 3u, 0u);
+  uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+  DCHECK_EQ(displacement & 3u, 0u);
+  DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
+  uint32_t insn = (displacement & 0x0fffffffu) >> 2;
+  insn |= 0x94000000;  // BL
+
+  // Check that we're just overwriting an existing BL.
+  DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
+  // Write the new BL.
+  SetInsn(code, literal_offset, insn);
+}
+
+void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                  const LinkerPatch& patch,
+                                                  uint32_t patch_offset,
+                                                  uint32_t target_offset) {
+  DCHECK_EQ(patch_offset & 3u, 0u);
+  DCHECK_EQ(target_offset & 3u, 0u);
+  uint32_t literal_offset = patch.LiteralOffset();
+  uint32_t insn = GetInsn(code, literal_offset);
+  uint32_t pc_insn_offset = patch.PcInsnOffset();
+  uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu);
+  if (literal_offset == pc_insn_offset) {
+    // Check it's an ADRP with imm == 0 (unset).
+    DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u)
+        << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn;
+    if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() &&
+        adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) {
+      DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code),
+                                     literal_offset, patch_offset));
+      uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second;
+      uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu);
+      uint32_t adrp = PatchAdrp(insn, adrp_disp);
+
+      uint32_t out_disp = thunk_offset - patch_offset;
+      DCHECK_EQ(out_disp & 3u, 0u);
+      DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u);  // 28-bit signed.
+      insn = (out_disp & 0x0fffffffu) >> 2;
+      insn |= 0x14000000;  // B <thunk>
+
+      uint32_t back_disp = -out_disp;
+      DCHECK_EQ(back_disp & 3u, 0u);
+      DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u);  // 28-bit signed.
+      uint32_t b_back = (back_disp & 0x0fffffffu) >> 2;
+      b_back |= 0x14000000;  // B <back>
+      size_t thunks_code_offset = current_method_thunks_.size();
+      current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize);
+      SetInsn(&current_method_thunks_, thunks_code_offset, adrp);
+      SetInsn(&current_method_thunks_, thunks_code_offset + 4u, b_back);
+      static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions");
+
+      processed_adrp_thunks_ += 1u;
+    } else {
+      insn = PatchAdrp(insn, disp);
+    }
+    // Write the new ADRP (or B to the erratum 843419 thunk).
+    SetInsn(code, literal_offset, insn);
+  } else {
+    DCHECK_EQ(insn & 0xfffffc00, 0xb9400000);  // LDR 32-bit with imm12 == 0 (unset).
+    if (kIsDebugBuild) {
+      uint32_t adrp = GetInsn(code, pc_insn_offset);
+      if ((adrp & 0x9f000000u) != 0x90000000u) {
+        CHECK(fix_cortex_a53_843419_);
+        CHECK_EQ(adrp & 0xfc000000u, 0x14000000u);  // B <thunk>
+        CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+        size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
+        CHECK_LE(num_thunks, processed_adrp_thunks_);
+        uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset;
+        for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) {
+          CHECK_NE(i, processed_adrp_thunks_);
+          if (adrp_thunk_locations_[i].first == b_offset) {
+            size_t idx = num_thunks - (processed_adrp_thunks_ - i);
+            adrp = GetInsn(&current_method_thunks_, idx * kAdrpThunkSize);
+            break;
+          }
+        }
+      }
+      CHECK_EQ(adrp & 0x9f00001fu,                    // Check that pc_insn_offset points
+               0x90000000 | ((insn >> 5) & 0x1fu));   // to ADRP with matching register.
+    }
+    uint32_t imm12 = (disp & 0xfffu) >> 2;
+    insn = (insn & ~(0xfffu << 10)) | (imm12 << 10);
+    SetInsn(code, literal_offset, insn);
+  }
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
+  // The thunk just uses the entry point in the ArtMethod. This works even for calls
+  // to the generic JNI and interpreter trampolines.
+  arm64::Arm64Assembler assembler;
+  Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kArm64PointerSize).Int32Value());
+  assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+  // Ensure we emit the literal pool.
+  assembler.EmitSlowPaths();
+  std::vector<uint8_t> thunk_code(assembler.CodeSize());
+  MemoryRegion code(thunk_code.data(), thunk_code.size());
+  assembler.FinalizeInstructions(code);
+  return thunk_code;
+}
+
+uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) {
+  return (adrp & 0x9f00001fu) |  // Clear offset bits, keep ADRP with destination reg.
+      // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30.
+      ((disp & 0x00003000u) << (29 - 12)) |
+      // The next 16 bits are encoded in bits 5-22.
+      ((disp & 0xffffc000u) >> (12 + 2 - 5)) |
+      // Since the target_offset is based on the beginning of the oat file and the
+      // image space precedes the oat file, the target_offset into image space will
+      // be negative yet passed as uint32_t. Therefore we limit the displacement
+      // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from
+      // the highest bit of the displacement. This is encoded in bit 23.
+      ((disp & 0x80000000u) >> (31 - 23));
+}
+
+bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
+                                                   uint32_t literal_offset,
+                                                   uint32_t patch_offset) {
+  DCHECK_EQ(patch_offset & 0x3u, 0u);
+  if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
+    uint32_t adrp = GetInsn(code, literal_offset);
+    DCHECK_EQ(adrp & 0xff000000, 0x90000000);
+    uint32_t next_offset = patch_offset + 4u;
+    uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+
+    // Below we avoid patching sequences where the adrp is followed by a load which can easily
+    // be proved to be aligned.
+
+    // First check if the next insn is the LDR using the result of the ADRP.
+    // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg.
+    if ((next_insn & 0xffc00000) == 0xb9400000 &&
+        (((next_insn >> 5) ^ adrp) & 0x1f) == 0) {
+      return false;
+    }
+
+    // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
+    if ((next_insn & 0xff000000) == 0x18000000) {
+      return false;
+    }
+
+    // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8.
+    if ((next_insn & 0xff000000) == 0x58000000) {
+      bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0;
+      return !is_aligned_load;
+    }
+
+    // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is
+    // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size.
+    if ((next_insn & 0xbfc003e0) == 0xb94003e0) {
+      return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+  DCHECK_LE(offset + 4u, code->size());
+  DCHECK_EQ(offset & 3u, 0u);
+  uint8_t* addr = &(*code)[offset];
+  addr[0] = (value >> 0) & 0xff;
+  addr[1] = (value >> 8) & 0xff;
+  addr[2] = (value >> 16) & 0xff;
+  addr[3] = (value >> 24) & 0xff;
+}
+
+uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) {
+  DCHECK_LE(offset + 4u, code.size());
+  DCHECK_EQ(offset & 3u, 0u);
+  const uint8_t* addr = &code[offset];
+  return
+      (static_cast<uint32_t>(addr[0]) << 0) +
+      (static_cast<uint32_t>(addr[1]) << 8) +
+      (static_cast<uint32_t>(addr[2]) << 16)+
+      (static_cast<uint32_t>(addr[3]) << 24);
+}
+
+template <typename Alloc>
+uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+  return GetInsn(ArrayRef<const uint8_t>(*code), offset);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
new file mode 100644
index 0000000000..2d07e75c85
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
+#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
+
+#include "linker/arm/relative_patcher_arm_base.h"
+#include "utils/array_ref.h"
+
+namespace art {
+namespace linker {
+
+class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
+ public:
+  Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
+                       const Arm64InstructionSetFeatures* features);
+
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ private:
+  static std::vector<uint8_t> CompileThunkCode();
+  static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
+
+  static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
+                                      uint32_t patch_offset);
+  void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset);
+
+  template <typename Alloc>
+  static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
+  // Maximum positive and negative displacement measured from the patch location.
+  // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
+  // the ARM64 PC pointing to the BL.)
+  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
+  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
+
+  // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
+  static constexpr uint32_t kAdrpThunkSize = 8u;
+
+  const bool fix_cortex_a53_843419_;
+  // Map original patch_offset to thunk offset.
+  std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_;
+  size_t reserved_adrp_thunks_;
+  size_t processed_adrp_thunks_;
+  std::vector<uint8_t> current_method_thunks_;
+
+  DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
new file mode 100644
index 0000000000..21f93672ad
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -0,0 +1,582 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/arm64/relative_patcher_arm64.h"
+
+namespace art {
+namespace linker {
+
+class Arm64RelativePatcherTest : public RelativePatcherTest {
+ public:
+  explicit Arm64RelativePatcherTest(const std::string& variant)
+      : RelativePatcherTest(kArm64, variant) { }
+
+ protected:
+  static const uint8_t kCallRawCode[];
+  static const ArrayRef<const uint8_t> kCallCode;
+  static const uint8_t kNopRawCode[];
+  static const ArrayRef<const uint8_t> kNopCode;
+
+  // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits.
+  static constexpr uint32_t kBlPlus0 = 0x94000000u;
+  static constexpr uint32_t kBPlus0 = 0x14000000u;
+
+  // Special BL values.
+  static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
+  static constexpr uint32_t kBlMinusMax = 0x96000000u;
+
+  // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
+  static constexpr uint32_t kLdurInsn = 0xf840405fu;
+
+  // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units.
+  static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu;
+  static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu;
+
+  // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP
+  // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load).
+  static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu;
+  static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
+
+  uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
+                                 const ArrayRef<const LinkerPatch>& method1_patches,
+                                 const ArrayRef<const uint8_t>& last_method_code,
+                                 const ArrayRef<const LinkerPatch>& last_method_patches,
+                                 uint32_t distance_without_thunks) {
+    CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
+    const uint32_t method1_offset =
+        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
+    const uint32_t gap_start =
+        CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+
+    // We want to put the method3 at a very precise offset.
+    const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+    const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
+    CHECK(IsAligned<kArm64Alignment>(gap_end));
+
+    // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+    // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
+    // offsets by this test.)
+    uint32_t method_idx = 2u;
+    constexpr uint32_t kSmallChunkSize = 2 * MB;
+    std::vector<uint8_t> gap_code;
+    size_t gap_size = gap_end - gap_start;
+    for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
+      uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+      gap_code.resize(chunk_code_size, 0u);
+      AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
+                        ArrayRef<const LinkerPatch>());
+      method_idx += 1u;
+    }
+    uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
+    gap_code.resize(chunk_code_size, 0u);
+    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
+                      ArrayRef<const LinkerPatch>());
+    method_idx += 1u;
+
+    // Add the last method and link
+    AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
+    Link();
+
+    // Check assumptions.
+    CHECK_EQ(GetMethodOffset(1), method1_offset);
+    auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(last_result.first);
+    // There may be a thunk before method2.
+    if (last_result.second != last_method_offset) {
+      // Thunk present. Check that there's only one.
+      uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
+      CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+    }
+    return method_idx;
+  }
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    CHECK_EQ(result.second & 3u, 0u);
+    return result.second;
+  }
+
+  uint32_t ThunkSize() {
+    return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size();
+  }
+
+  bool CheckThunk(uint32_t thunk_offset) {
+    Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get());
+    ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+    if (output_.size() < thunk_offset + expected_code.size()) {
+      LOG(ERROR) << "output_.size() == " << output_.size() << " < "
+          << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
+      return false;
+    }
+    ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
+    if (linked_code == expected_code) {
+      return true;
+    }
+    // Log failure info.
+    DumpDiff(expected_code, linked_code);
+    return false;
+  }
+
+  std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
+    std::vector<uint8_t> result;
+    result.reserve(num_nops * 4u + 4u);
+    for (size_t i = 0; i != num_nops; ++i) {
+      result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+    }
+    result.push_back(static_cast<uint8_t>(bl));
+    result.push_back(static_cast<uint8_t>(bl >> 8));
+    result.push_back(static_cast<uint8_t>(bl >> 16));
+    result.push_back(static_cast<uint8_t>(bl >> 24));
+    return result;
+  }
+
+  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
+                                         uint32_t method_offset, uint32_t target_offset) {
+    std::vector<uint8_t> result;
+    result.reserve(num_nops * 4u + 8u);
+    for (size_t i = 0; i != num_nops; ++i) {
+      result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+    }
+    DCHECK_EQ(method_offset & 3u, 0u);
+    DCHECK_EQ(target_offset & 3u, 0u);
+    uint32_t adrp_offset = method_offset + num_nops * 4u;
+    uint32_t disp = target_offset - (adrp_offset & ~0xfffu);
+    DCHECK_EQ(disp & 3u, 0u);
+    uint32_t ldr = 0xb9400001 |               // LDR w1, [x0, #(imm12 * 2)]
+        ((disp & 0xfffu) << (10 - 2));        // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+    uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+        ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
+        ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
+        // We take the sign bit from the disp, limiting disp to +- 2GiB.
+        ((disp & 0x80000000) >> (31 - 23));   // sign bit in immhi is at bit 23.
+    result.push_back(static_cast<uint8_t>(adrp));
+    result.push_back(static_cast<uint8_t>(adrp >> 8));
+    result.push_back(static_cast<uint8_t>(adrp >> 16));
+    result.push_back(static_cast<uint8_t>(adrp >> 24));
+    result.push_back(static_cast<uint8_t>(ldr));
+    result.push_back(static_cast<uint8_t>(ldr >> 8));
+    result.push_back(static_cast<uint8_t>(ldr >> 16));
+    result.push_back(static_cast<uint8_t>(ldr >> 24));
+    return result;
+  }
+
+  void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+    auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
+    LinkerPatch patches[] = {
+        LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
+        LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
+    };
+    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
+
+  void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+    CHECK_LE(pos, code->size());
+    const uint8_t insn_code[] = {
+        static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8),
+        static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24),
+    };
+    static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+    code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+  }
+
+  void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
+                               uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+    auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
+    InsertInsn(&code, num_nops * 4u + 4u, insn2);
+    LinkerPatch patches[] = {
+        LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
+        LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
+    };
+    AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+                      ArrayRef<const LinkerPatch>(patches));
+    Link();
+  }
+
+  void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
+                            uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
+
+  void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2,
+                                    uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    CHECK(!compiled_method_refs_.empty());
+    CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u);
+    CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size());
+    uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size();
+    uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64);
+    uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
+    ASSERT_EQ(b_diff & 3u, 0u);
+    ASSERT_LT(b_diff, 128 * MB);
+    uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu);
+    uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu);
+
+    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+    InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
+    // Replace adrp with bl.
+    expected_code.erase(expected_code.begin() + num_nops * 4u,
+                        expected_code.begin() + num_nops * 4u + 4u);
+    InsertInsn(&expected_code, num_nops * 4u, b_out);
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+
+    auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset);
+    ASSERT_EQ(expected_thunk_code.size(), 8u);
+    expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u);
+    InsertInsn(&expected_thunk_code, 4u, b_in);
+    ASSERT_EQ(expected_thunk_code.size(), 8u);
+
+    uint32_t thunk_size = ThunkSize();
+    ASSERT_EQ(thunk_offset + thunk_size, output_.size());
+    ASSERT_EQ(thunk_size, expected_thunk_code.size());
+    ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size);
+    if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code);
+      FAIL();
+    }
+  }
+
+  void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk,
+                        uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    uint32_t method1_offset =
+        CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+    ASSERT_LT(method1_offset, adrp_offset);
+    ASSERT_EQ(adrp_offset & 3u, 0u);
+    uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+    if (has_thunk) {
+      TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+    } else {
+      TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+    }
+    ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
+  }
+
+  void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
+                       uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
+
+  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp,
+                           uint32_t adrp_offset, bool has_thunk,
+                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    ASSERT_LT(pcrel_disp, 0x100000);
+    ASSERT_GE(pcrel_disp, -0x100000);
+    ASSERT_EQ(pcrel_disp & 0x3, 0);
+    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
+
+  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset, bool has_thunk,
+                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
+    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
+};
+
+const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
+    0x00, 0x00, 0x00, 0x94
+};
+
+const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = {
+    0x1f, 0x20, 0x03, 0xd5
+};
+
+const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode);
+
+class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest {
+ public:
+  Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { }
+};
+
+class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest {
+ public:
+  Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { }
+};
+
+TEST_F(Arm64RelativePatcherTestDefault, CallSelf) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  static const uint8_t expected_code[] = {
+      0x00, 0x00, 0x00, 0x94
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOther) {
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+  LinkerPatch method2_patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t method2_offset = GetMethodOffset(2u);
+  uint32_t diff_after = method2_offset - method1_offset;
+  ASSERT_EQ(diff_after & 3u, 0u);
+  ASSERT_LT(diff_after >> 2, 1u << 8);  // Simple encoding, (diff_after >> 2) fits into 8 bits.
+  static const uint8_t method1_expected_code[] = {
+      static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+  uint32_t diff_before = method1_offset - method2_offset;
+  ASSERT_EQ(diff_before & 3u, 0u);
+  ASSERT_GE(diff_before, -1u << 27);
+  auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t diff = kTrampolineOffset - method1_offset;
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_GE(diff, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
+  auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
+  auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method1 = 1u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> method1_code(method1_raw_code);
+  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+  uint32_t expected_last_method_idx = 65;  // Based on 2MiB chunks in Create2MethodsWithGap().
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
+  };
+
+  constexpr uint32_t max_positive_disp = 128 * MB - 4u;
+  uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches,
+                                                   kNopCode, ArrayRef<const LinkerPatch>(),
+                                                   bl_offset_in_method1 + max_positive_disp);
+  ASSERT_EQ(expected_last_method_idx, last_method_idx);
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset);
+
+  // Check linked code.
+  auto expected_code = GenNopsAndBl(1u, kBlPlusMax);
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) {
+  auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_last_method = 0u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+  LinkerPatch last_method_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
+  };
+
+  constexpr uint32_t max_negative_disp = 128 * MB;
+  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+                                                   last_method_code, last_method_patches,
+                                                   max_negative_disp - bl_offset_in_last_method);
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp);
+
+  // Check linked code.
+  auto expected_code = GenNopsAndBl(0u, kBlMinusMax);
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+                                ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) {
+  auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method1 = 0u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> method1_code(method1_raw_code);
+  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+  uint32_t expected_last_method_idx = 65;  // Based on 2MiB chunks in Create2MethodsWithGap().
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
+  };
+
+  constexpr uint32_t just_over_max_positive_disp = 128 * MB;
+  uint32_t last_method_idx = Create2MethodsWithGap(
+      method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(),
+      bl_offset_in_method1 + just_over_max_positive_disp);
+  ASSERT_EQ(expected_last_method_idx, last_method_idx);
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
+  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
+  uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
+  ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+  uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
+  ASSERT_EQ(diff & 3u, 0u);
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  CheckThunk(thunk_offset);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) {
+  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+  LinkerPatch last_method_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+  uint32_t last_method_idx = Create2MethodsWithGap(
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches,
+      just_over_max_negative_disp - bl_offset_in_last_method);
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset,
+            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+
+  // Check linked code.
+  uint32_t thunk_offset =
+      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+  ASSERT_EQ(diff & 3u, 0u);
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+                                ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) {
+  TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) {
+  TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) {
+  TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) {
+  TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) {
+  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) {
+  TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) {
+  TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) {
+  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) {
+  TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) {
+  TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) {
+  TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) {
+  TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+}
+
+#define TEST_FOR_OFFSETS(test, disp1, disp2) \
+  test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
+  test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
+
+// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
+#define LDRW_PCREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
+    TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
+#define LDRX_PCREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
+    bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
+    TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
+#define LDRW_SPREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
+    TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4)
+
+#define LDRX_SPREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
+    TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8)
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
new file mode 100644
index 0000000000..89aed956aa
--- /dev/null
+++ b/compiler/linker/relative_patcher.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher.h"
+
+#include "linker/arm/relative_patcher_thumb2.h"
+#include "linker/arm64/relative_patcher_arm64.h"
+#include "linker/x86/relative_patcher_x86.h"
+#include "linker/x86_64/relative_patcher_x86_64.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+std::unique_ptr<RelativePatcher> RelativePatcher::Create(
+    InstructionSet instruction_set, const InstructionSetFeatures* features,
+    RelativePatcherTargetProvider* provider) {
+  class RelativePatcherNone FINAL : public RelativePatcher {
+   public:
+    RelativePatcherNone() { }
+
+    uint32_t ReserveSpace(uint32_t offset,
+                          const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+                          MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE {
+      return offset;  // No space reserved; no patches expected.
+    }
+
+    uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE {
+      return offset;  // No space reserved; no patches expected.
+    }
+
+    uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
+      return offset;  // No thunks added; no patches expected.
+    }
+
+    void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                   uint32_t literal_offset ATTRIBUTE_UNUSED,
+                   uint32_t patch_offset ATTRIBUTE_UNUSED,
+                   uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "Unexpected relative call patch.";
+    }
+
+    virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                        uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                        uint32_t target_offset ATTRIBUTE_UNUSED) {
+      LOG(FATAL) << "Unexpected relative dex cache array patch.";
+    }
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone);
+  };
+
+  switch (instruction_set) {
+    case kX86:
+      return std::unique_ptr<RelativePatcher>(new X86RelativePatcher());
+    case kX86_64:
+      return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher());
+    case kArm:
+      // Fall through: we generate Thumb2 code for "arm".
+    case kThumb2:
+      return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider));
+    case kArm64:
+      return std::unique_ptr<RelativePatcher>(
+          new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
+    default:
+      return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
+  }
+}
+
+bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
+  static const uint8_t kPadding[] = {
+      0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+  };
+  DCHECK_LE(aligned_code_delta, sizeof(kPadding));
+  if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+    return false;
+  }
+  size_code_alignment_ += aligned_code_delta;
+  return true;
+}
+
+bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
+  if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
+    return false;
+  }
+  size_relative_call_thunks_ += thunk.size();
+  return true;
+}
+
+bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
+  if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
+    return false;
+  }
+  size_misc_thunks_ += thunk.size();
+  return true;
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
new file mode 100644
index 0000000000..8a9f3f8364
--- /dev/null
+++ b/compiler/linker/relative_patcher.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
+#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/macros.h"
+#include "method_reference.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class CompiledMethod;
+class LinkerPatch;
+class OutputStream;
+
+namespace linker {
+
+/**
+ * @class RelativePatcherTargetProvider
+ * @brief Interface for providing method offsets for relative call targets.
+ */
+class RelativePatcherTargetProvider {
+ public:
+  /**
+   * Find the offset of the target method of a relative call if known.
+   *
+   * The process of assigning target method offsets includes calls to the relative patcher's
+   * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already
+   * has an offset assigned and, if so, what's that offset. If the offset has not yet been
+   * assigned or if it's too far for the particular architecture's relative call,
+   * ReserveSpace() may need to allocate space for a special dispatch thunk.
+   *
+   * @param ref the target method of the relative call.
+   * @return true in the first element of the pair if the method was found, false otherwise;
+   *         if found, the second element specifies the offset.
+   */
+  virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0;
+
+ protected:
+  virtual ~RelativePatcherTargetProvider() { }
+};
+
+/**
+ * @class RelativePatcher
+ * @brief Interface for architecture-specific link-time patching of PC-relative references.
+ */
+class RelativePatcher {
+ public:
+  static std::unique_ptr<RelativePatcher> Create(
+      InstructionSet instruction_set, const InstructionSetFeatures* features,
+      RelativePatcherTargetProvider* provider);
+
+  virtual ~RelativePatcher() { }
+
+  uint32_t CodeAlignmentSize() const {
+    return size_code_alignment_;
+  }
+
+  uint32_t RelativeCallThunksSize() const {
+    return size_relative_call_thunks_;
+  }
+
+  uint32_t MiscThunksSize() const {
+    return size_misc_thunks_;
+  }
+
+  // Reserve space for thunks if needed before a method, return adjusted offset.
+  virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+                                MethodReference method_ref) = 0;
+
+  // Reserve space for thunks if needed after the last method, return adjusted offset.
+  virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0;
+
+  // Write relative call thunks if needed, return adjusted offset.
+  virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
+
+  // Patch method code. The input displacement is relative to the patched location,
+  // the patcher may need to adjust it if the correct base is different.
+  virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                         uint32_t patch_offset, uint32_t target_offset) = 0;
+
+  // Patch a reference to a dex cache location.
+  virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+                                      uint32_t patch_offset, uint32_t target_offset) = 0;
+
+ protected:
+  RelativePatcher()
+      : size_code_alignment_(0u),
+        size_relative_call_thunks_(0u),
+        size_misc_thunks_(0u) {
+  }
+
+  bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+  bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
+  bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
+
+ private:
+  uint32_t size_code_alignment_;
+  uint32_t size_relative_call_thunks_;
+  uint32_t size_misc_thunks_;
+
+  DISALLOW_COPY_AND_ASSIGN(RelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
new file mode 100644
index 0000000000..70630f366f
--- /dev/null
+++ b/compiler/linker/relative_patcher_test.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
+#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/macros.h"
+#include "compiled_method.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+#include "linker/relative_patcher.h"
+#include "method_reference.h"
+#include "oat.h"
+#include "utils/array_ref.h"
+#include "vector_output_stream.h"
+
+namespace art {
+namespace linker {
+
+// Base class providing infrastructure for architecture-specific tests.
+class RelativePatcherTest : public testing::Test {
+ protected:
+  RelativePatcherTest(InstructionSet instruction_set, const std::string& variant)
+      : compiler_options_(),
+        verification_results_(&compiler_options_),
+        inliner_map_(),
+        driver_(&compiler_options_, &verification_results_, &inliner_map_,
+                Compiler::kQuick, instruction_set, nullptr,
+                false, nullptr, nullptr, 1u,
+                false, false, "", nullptr, -1, ""),
+        error_msg_(),
+        instruction_set_(instruction_set),
+        features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
+        method_offset_map_(),
+        patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)),
+        dex_cache_arrays_begin_(0u),
+        compiled_method_refs_(),
+        compiled_methods_(),
+        patched_code_(),
+        output_(),
+        out_("test output stream", &output_) {
+    CHECK(error_msg_.empty()) << instruction_set << "/" << variant;
+    patched_code_.reserve(16 * KB);
+  }
+
+  MethodReference MethodRef(uint32_t method_idx) {
+    CHECK_NE(method_idx, 0u);
+    return MethodReference(nullptr, method_idx);
+  }
+
+  void AddCompiledMethod(MethodReference method_ref,
+                         const ArrayRef<const uint8_t>& code,
+                         const ArrayRef<const LinkerPatch>& patches) {
+    compiled_method_refs_.push_back(method_ref);
+    compiled_methods_.emplace_back(new CompiledMethod(
+        &driver_, instruction_set_, code,
+        0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+        ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+        patches));
+  }
+
+  void Link() {
+    // Reserve space.
+    static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
+    uint32_t offset = kTrampolineSize;
+    size_t idx = 0u;
+    for (auto& compiled_method : compiled_methods_) {
+      offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
+
+      uint32_t aligned_offset = compiled_method->AlignCode(offset);
+      uint32_t aligned_code_delta = aligned_offset - offset;
+      offset += aligned_code_delta;
+
+      offset += sizeof(OatQuickMethodHeader);
+      uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
+      const auto& code = *compiled_method->GetQuickCode();
+      offset += code.size();
+
+      method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset);
+      ++idx;
+    }
+    offset = patcher_->ReserveSpaceEnd(offset);
+    uint32_t output_size = offset;
+    output_.reserve(output_size);
+
+    // Write data.
+    DCHECK(output_.empty());
+    uint8_t dummy_trampoline[kTrampolineSize];
+    memset(dummy_trampoline, 0, sizeof(dummy_trampoline));
+    out_.WriteFully(dummy_trampoline, kTrampolineSize);
+    offset = kTrampolineSize;
+    static const uint8_t kPadding[] = {
+        0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+    };
+    uint8_t dummy_header[sizeof(OatQuickMethodHeader)];
+    memset(dummy_header, 0, sizeof(dummy_header));
+    for (auto& compiled_method : compiled_methods_) {
+      offset = patcher_->WriteThunks(&out_, offset);
+
+      uint32_t aligned_offset = compiled_method->AlignCode(offset);
+      uint32_t aligned_code_delta = aligned_offset - offset;
+      CHECK_LE(aligned_code_delta, sizeof(kPadding));
+      out_.WriteFully(kPadding, aligned_code_delta);
+      offset += aligned_code_delta;
+
+      out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
+      offset += sizeof(OatQuickMethodHeader);
+      ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+      if (!compiled_method->GetPatches().empty()) {
+        patched_code_.assign(code.begin(), code.end());
+        code = ArrayRef<const uint8_t>(patched_code_);
+        for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+          if (patch.Type() == kLinkerPatchCallRelative) {
+            auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
+            uint32_t target_offset =
+                result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
+            patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
+                                offset + patch.LiteralOffset(), target_offset);
+          } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+            uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
+            patcher_->PatchDexCacheReference(&patched_code_, patch,
+                                             offset + patch.LiteralOffset(), target_offset);
+          } else {
+            LOG(FATAL) << "Bad patch type.";
+          }
+        }
+      }
+      out_.WriteFully(&code[0], code.size());
+      offset += code.size();
+    }
+    offset = patcher_->WriteThunks(&out_, offset);
+    CHECK_EQ(offset, output_size);
+    CHECK_EQ(output_.size(), output_size);
+  }
+
+  bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) {
+    // Sanity check: original code size must match linked_code.size().
+    size_t idx = 0u;
+    for (auto ref : compiled_method_refs_) {
+      if (ref.dex_file == method_ref.dex_file &&
+          ref.dex_method_index == method_ref.dex_method_index) {
+        break;
+      }
+      ++idx;
+    }
+    CHECK_NE(idx, compiled_method_refs_.size());
+    CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size());
+
+    auto result = method_offset_map_.FindMethodOffset(method_ref);
+    CHECK(result.first);  // Must have been linked.
+    size_t offset = result.second - compiled_methods_[idx]->CodeDelta();
+    CHECK_LT(offset, output_.size());
+    CHECK_LE(offset + expected_code.size(), output_.size());
+    ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size());
+    if (linked_code == expected_code) {
+      return true;
+    }
+    // Log failure info.
+    DumpDiff(expected_code, linked_code);
+    return false;
+  }
+
+  void DumpDiff(const ArrayRef<const uint8_t>& expected_code,
+                const ArrayRef<const uint8_t>& linked_code) {
+    std::ostringstream expected_hex;
+    std::ostringstream linked_hex;
+    std::ostringstream diff_indicator;
+    static const char digits[] = "0123456789abcdef";
+    bool found_diff = false;
+    for (size_t i = 0; i != expected_code.size(); ++i) {
+      expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf];
+      linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf];
+      if (!found_diff) {
+        found_diff = (expected_code[i] != linked_code[i]);
+        diff_indicator << (found_diff ? " ^^" : "   ");
+      }
+    }
+    CHECK(found_diff);
+    std::string expected_hex_str = expected_hex.str();
+    std::string linked_hex_str = linked_hex.str();
+    std::string diff_indicator_str = diff_indicator.str();
+    if (diff_indicator_str.length() > 60) {
+      CHECK_EQ(diff_indicator_str.length() % 3u, 0u);
+      size_t remove = diff_indicator_str.length() / 3 - 5;
+      std::ostringstream oss;
+      oss << "[stripped " << remove << "]";
+      std::string replacement = oss.str();
+      expected_hex_str.replace(0u, remove * 3u, replacement);
+      linked_hex_str.replace(0u, remove * 3u, replacement);
+      diff_indicator_str.replace(0u, remove * 3u, replacement);
+    }
+    LOG(ERROR) << "diff expected_code linked_code";
+    LOG(ERROR) << "<" << expected_hex_str;
+    LOG(ERROR) << ">" << linked_hex_str;
+    LOG(ERROR) << " " << diff_indicator_str;
+  }
+
+  // Map method reference to assinged offset.
+  // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
+  class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider {
+   public:
+    std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE {
+      auto it = map.find(ref);
+      if (it == map.end()) {
+        return std::pair<bool, uint32_t>(false, 0u);
+      } else {
+        return std::pair<bool, uint32_t>(true, it->second);
+      }
+    }
+    SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
+  };
+
+  static const uint32_t kTrampolineSize = 4u;
+  static const uint32_t kTrampolineOffset = 0u;
+
+  CompilerOptions compiler_options_;
+  VerificationResults verification_results_;
+  DexFileToMethodInlinerMap inliner_map_;
+  CompilerDriver driver_;  // Needed for constructing CompiledMethod.
+  std::string error_msg_;
+  InstructionSet instruction_set_;
+  std::unique_ptr<const InstructionSetFeatures> features_;
+  MethodOffsetMap method_offset_map_;
+  std::unique_ptr<RelativePatcher> patcher_;
+  uint32_t dex_cache_arrays_begin_;
+  std::vector<MethodReference> compiled_method_refs_;
+  std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
+  std::vector<uint8_t> patched_code_;
+  std::vector<uint8_t> output_;
+  VectorOutputStream out_;
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
new file mode 100644
index 0000000000..315585d9e7
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86/relative_patcher_x86.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                const LinkerPatch& patch,
+                                                uint32_t patch_offset,
+                                                uint32_t target_offset) {
+  uint32_t anchor_literal_offset = patch.PcInsnOffset();
+  uint32_t literal_offset = patch.LiteralOffset();
+
+  // Check that the anchor points to pop in a "call +0; pop <reg>" sequence.
+  DCHECK_GE(anchor_literal_offset, 5u);
+  DCHECK_LT(anchor_literal_offset, code->size());
+  DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u);
+
+  // Check that the patched data contains kDummy32BitOffset.
+  constexpr int kDummy32BitOffset = 256;  // Must match X86Mir2Lir::kDummy32BitOffset.
+  DCHECK_LE(literal_offset, code->size());
+  DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0));
+  DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8));
+  DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16));
+  DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24));
+
+  // Apply patch.
+  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+  uint32_t diff = target_offset - anchor_offset;
+  (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0);
+  (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8);
+  (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16);
+  (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
new file mode 100644
index 0000000000..0c881f00ba
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
+#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+class X86RelativePatcher FINAL : public X86BaseRelativePatcher {
+ public:
+  X86RelativePatcher() { }
+
+  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc
new file mode 100644
index 0000000000..bc285a7849
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_base.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+uint32_t X86BaseRelativePatcher::ReserveSpace(
+    uint32_t offset,
+    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+    MethodReference method_ref ATTRIBUTE_UNUSED) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
+  return offset;  // No thunks added; no limit on relative call distance.
+}
+
+void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                                       uint32_t patch_offset, uint32_t target_offset) {
+  DCHECK_LE(literal_offset + 4u, code->size());
+  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+  uint32_t displacement = target_offset - patch_offset;
+  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+
+  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+  reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h
new file mode 100644
index 0000000000..9200709398
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_base.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
+#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
+
+#include "linker/relative_patcher.h"
+
+namespace art {
+namespace linker {
+
+class X86BaseRelativePatcher : public RelativePatcher {
+ public:
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+                 uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ protected:
+  X86BaseRelativePatcher() { }
+
+  // PC displacement from patch location; the base address of x86/x86-64 relative
+  // calls and x86-64 RIP-relative addressing is the PC of the next instruction and
+  // the patch location is 4 bytes earlier.
+  static constexpr int32_t kPcDisplacement = 4;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
new file mode 100644
index 0000000000..7acc33004a
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/x86/relative_patcher_x86.h"
+
+namespace art {
+namespace linker {
+
+class X86RelativePatcherTest : public RelativePatcherTest {
+ public:
+  X86RelativePatcherTest() : RelativePatcherTest(kX86, "default") { }
+
+ protected:
+  static const uint8_t kCallRawCode[];
+  static const ArrayRef<const uint8_t> kCallCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+};
+
+const uint8_t X86RelativePatcherTest::kCallRawCode[] = {
+    0xe8, 0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode);
+
+TEST_F(X86RelativePatcherTest, CallSelf) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  static const uint8_t expected_code[] = {
+      0xe8, 0xfb, 0xff, 0xff, 0xff
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, CallOther) {
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+  LinkerPatch method2_patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t method2_offset = GetMethodOffset(2u);
+  uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
+  static const uint8_t method1_expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+  uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
+  static const uint8_t method2_expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, CallTrampoline) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
+  static const uint8_t expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, DexCacheReference) {
+  dex_cache_arrays_begin_ = 0x12345678;
+  constexpr size_t kElementOffset = 0x1234;
+  static const uint8_t raw_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8b, 0x83, 0x00, 0x01, 0x00, 0x00,   // mov eax, [ebx + 256 (kDummy32BitValue)]
+  };
+  constexpr uint32_t anchor_offset = 5u;  // After call +0.
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff =
+      dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset);
+  static const uint8_t expected_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8b, 0x83,                           // mov eax, [ebx + diff]
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
new file mode 100644
index 0000000000..598f3ac4a8
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86_64/relative_patcher_x86_64.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset, uint32_t target_offset) {
+  DCHECK_LE(patch.LiteralOffset() + 4u, code->size());
+  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+  uint32_t displacement = target_offset - patch_offset;
+  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+
+  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+  reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
new file mode 100644
index 0000000000..af687b4a2f
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
+#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher {
+ public:
+  X86_64RelativePatcher() { }
+
+  void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+                              uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
new file mode 100644
index 0000000000..36e0f01a50
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/x86_64/relative_patcher_x86_64.h"
+
+namespace art {
+namespace linker {
+
+class X86_64RelativePatcherTest : public RelativePatcherTest {
+ public:
+  X86_64RelativePatcherTest() : RelativePatcherTest(kX86_64, "default") { }
+
+ protected:
+  static const uint8_t kCallRawCode[];
+  static const ArrayRef<const uint8_t> kCallCode;
+  static const uint8_t kDexCacheLoadRawCode[];
+  static const ArrayRef<const uint8_t> kDexCacheLoadCode;
+
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+};
+
+const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = {
+    0xe8, 0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = {
+    0x8b, 0x05,  // mov eax, [rip + <offset>]
+    0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode(
+    kDexCacheLoadRawCode);
+
+TEST_F(X86_64RelativePatcherTest, CallSelf) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  static const uint8_t expected_code[] = {
+      0xe8, 0xfb, 0xff, 0xff, 0xff
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, CallOther) {
+  LinkerPatch method1_patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+  LinkerPatch method2_patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+  };
+  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+  Link();
+
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t method2_offset = GetMethodOffset(2u);
+  uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
+  static const uint8_t method1_expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
+      static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+  uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
+  static const uint8_t method2_expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
+      static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, CallTrampoline) {
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
+  static const uint8_t expected_code[] = {
+      0xe8,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, DexCacheReference) {
+  dex_cache_arrays_begin_ = 0x12345678;
+  constexpr size_t kElementOffset = 0x1234;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff =
+      dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
+  static const uint8_t expected_code[] = {
+      0x8b, 0x05,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index afd39e8874..989b04fa36 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -17,11 +17,14 @@
 #include "arch/instruction_set_features.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
+#include "compiled_method.h"
 #include "compiler.h"
 #include "dex/pass_manager.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick_compiler_callbacks.h"
 #include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -173,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index b3bb438bac..5b4cc54858 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -18,16 +18,21 @@
 
 #include <zlib.h>
 
+#include "arch/arm64/instruction_set_features_arm64.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "compiled_class.h"
+#include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "gc/space/space.h"
 #include "image_writer.h"
+#include "linker/relative_patcher.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -37,352 +42,10 @@
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
-#include "utils/arm/assembler_thumb2.h"
-#include "utils/arm64/assembler_arm64.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
-class OatWriter::RelativeCallPatcher {
- public:
-  virtual ~RelativeCallPatcher() { }
-
-  // Reserve space for relative call thunks if needed, return adjusted offset.
-  // After all methods have been processed it's call one last time with compiled_method == nullptr.
-  virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0;
-
-  // Write relative call thunks if needed, return adjusted offset.
-  virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
-
-  // Patch method code. The input displacement is relative to the patched location,
-  // the patcher may need to adjust it if the correct base is different.
-  virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
-                     uint32_t target_offset) = 0;
-
- protected:
-  RelativeCallPatcher() { }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher);
-};
-
-class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher {
- public:
-  NoRelativeCallPatcher() { }
-
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE {
-    return offset;  // No space reserved; no patches expected.
-  }
-
-  uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
-    return offset;  // No thunks added; no patches expected.
-  }
-
-  void Patch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, uint32_t literal_offset ATTRIBUTE_UNUSED,
-             uint32_t patch_offset ATTRIBUTE_UNUSED,
-             uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
-    LOG(FATAL) << "Unexpected relative patch.";
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher);
-};
-
-class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher {
- public:
-  X86RelativeCallPatcher() { }
-
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE {
-    return offset;  // No space reserved; no limit on relative call distance.
-  }
-
-  uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
-    return offset;  // No thunks added; no limit on relative call distance.
-  }
-
-  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
-             uint32_t target_offset) OVERRIDE {
-    DCHECK_LE(literal_offset + 4u, code->size());
-    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-    uint32_t displacement = target_offset - patch_offset;
-    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-
-    typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
-    reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
-  }
-
- private:
-  // PC displacement from patch location; x86 PC for relative calls points to the next
-  // instruction and the patch location is 4 bytes earlier.
-  static constexpr int32_t kPcDisplacement = 4;
-
-  DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
-};
-
-class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher {
- public:
-  ArmBaseRelativeCallPatcher(OatWriter* writer,
-                             InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
-                             uint32_t max_positive_displacement, uint32_t max_negative_displacement)
-      : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code),
-        max_positive_displacement_(max_positive_displacement),
-        max_negative_displacement_(max_negative_displacement),
-        thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
-  }
-
-  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
-    // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
-    // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
-    // of code. To avoid any alignment discrepancies for the final chunk, we always align the
-    // offset after reserving of writing any chunk.
-    if (UNLIKELY(compiled_method == nullptr)) {
-      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-      bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
-      if (needs_thunk) {
-        thunk_locations_.push_back(aligned_offset);
-        offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
-      }
-      return offset;
-    }
-    DCHECK(compiled_method->GetQuickCode() != nullptr);
-    uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
-    uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
-    uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
-    if (!unprocessed_patches_.empty() &&
-        next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
-      bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
-      if (needs_thunk) {
-        // A single thunk will cover all pending patches.
-        unprocessed_patches_.clear();
-        uint32_t thunk_location = compiled_method->AlignCode(offset);
-        thunk_locations_.push_back(thunk_location);
-        offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
-      }
-    }
-    for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-      if (patch.Type() == kLinkerPatchCallRelative) {
-        unprocessed_patches_.emplace_back(patch.TargetMethod(),
-                                          quick_code_offset + patch.LiteralOffset());
-      }
-    }
-    return offset;
-  }
-
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
-    if (current_thunk_to_write_ == thunk_locations_.size()) {
-      return offset;
-    }
-    uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-    if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
-      ++current_thunk_to_write_;
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
-        return 0u;
-      }
-      if (!out->WriteFully(thunk_code_.data(), thunk_code_.size())) {
-        return 0u;
-      }
-      writer_->size_relative_call_thunks_ += thunk_code_.size();
-      uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
-      // Align after writing chunk, see the ReserveSpace() above.
-      offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
-      aligned_code_delta = offset - thunk_end_offset;
-      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
-        return 0u;
-      }
-    }
-    return offset;
-  }
-
- protected:
-  uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) {
-    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-    uint32_t displacement = target_offset - patch_offset;
-    // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
-    if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
-      // Unwritten thunks have higher offsets, check if it's within range.
-      DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
-             thunk_locations_[current_thunk_to_write_] > patch_offset);
-      if (current_thunk_to_write_ != thunk_locations_.size() &&
-          thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
-        displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
-      } else {
-        // We must have a previous thunk then.
-        DCHECK_NE(current_thunk_to_write_, 0u);
-        DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
-        displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
-        DCHECK(displacement >= -max_negative_displacement_);
-      }
-    }
-    return displacement;
-  }
-
- private:
-  bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
-    // Process as many patches as possible, stop only on unresolved targets or calls too far back.
-    while (!unprocessed_patches_.empty()) {
-      uint32_t patch_offset = unprocessed_patches_.front().second;
-      auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
-      if (it == writer_->method_offset_map_.end()) {
-        // If still unresolved, check if we have a thunk within range.
-        DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
-        if (thunk_locations_.empty() ||
-            patch_offset - thunk_locations_.back() > max_negative_displacement_) {
-          return next_aligned_offset - patch_offset > max_positive_displacement_;
-        }
-      } else if (it->second >= patch_offset) {
-        DCHECK_LE(it->second - patch_offset, max_positive_displacement_);
-      } else {
-        // When calling back, check if we have a thunk that's closer than the actual target.
-        uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
-            ? it->second
-            : thunk_locations_.back();
-        DCHECK_GT(patch_offset, target_offset);
-        if (patch_offset - target_offset > max_negative_displacement_) {
-          return true;
-        }
-      }
-      unprocessed_patches_.pop_front();
-    }
-    return false;
-  }
-
-  OatWriter* const writer_;
-  const InstructionSet instruction_set_;
-  const std::vector<uint8_t> thunk_code_;
-  const uint32_t max_positive_displacement_;
-  const uint32_t max_negative_displacement_;
-  std::vector<uint32_t> thunk_locations_;
-  size_t current_thunk_to_write_;
-
-  // ReserveSpace() tracks unprocessed patches.
-  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
-  std::deque<UnprocessedPatch> unprocessed_patches_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher);
-};
-
-class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
- public:
-  explicit Thumb2RelativeCallPatcher(OatWriter* writer)
-      : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(),
-                                   kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
-  }
-
-  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
-             uint32_t target_offset) OVERRIDE {
-    DCHECK_LE(literal_offset + 4u, code->size());
-    DCHECK_EQ(literal_offset & 1u, 0u);
-    DCHECK_EQ(patch_offset & 1u, 0u);
-    DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
-    uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
-    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-    DCHECK_EQ(displacement & 1u, 0u);
-    DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
-    uint32_t signbit = (displacement >> 31) & 0x1;
-    uint32_t i1 = (displacement >> 23) & 0x1;
-    uint32_t i2 = (displacement >> 22) & 0x1;
-    uint32_t imm10 = (displacement >> 12) & 0x03ff;
-    uint32_t imm11 = (displacement >> 1) & 0x07ff;
-    uint32_t j1 = i1 ^ (signbit ^ 1);
-    uint32_t j2 = i2 ^ (signbit ^ 1);
-    uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
-    value |= 0xf000d000;  // BL
-
-    uint8_t* addr = &(*code)[literal_offset];
-    // Check that we're just overwriting an existing BL.
-    DCHECK_EQ(addr[1] & 0xf8, 0xf0);
-    DCHECK_EQ(addr[3] & 0xd0, 0xd0);
-    // Write the new BL.
-    addr[0] = (value >> 16) & 0xff;
-    addr[1] = (value >> 24) & 0xff;
-    addr[2] = (value >> 0) & 0xff;
-    addr[3] = (value >> 8) & 0xff;
-  }
-
- private:
-  static std::vector<uint8_t> CompileThunkCode() {
-    // The thunk just uses the entry point in the ArtMethod. This works even for calls
-    // to the generic JNI and interpreter trampolines.
-    arm::Thumb2Assembler assembler;
-    assembler.LoadFromOffset(
-        arm::kLoadWord, arm::PC, arm::R0,
-        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
-    assembler.bkpt(0);
-    std::vector<uint8_t> thunk_code(assembler.CodeSize());
-    MemoryRegion code(thunk_code.data(), thunk_code.size());
-    assembler.FinalizeInstructions(code);
-    return thunk_code;
-  }
-
-  // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
-  static constexpr int32_t kPcDisplacement = 4;
-
-  // Maximum positive and negative displacement measured from the patch location.
-  // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
-  // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
-  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
-  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
-
-  DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
-};
-
-class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
- public:
-  explicit Arm64RelativeCallPatcher(OatWriter* writer)
-      : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(),
-                                   kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
-  }
-
-  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
-             uint32_t target_offset) OVERRIDE {
-    DCHECK_LE(literal_offset + 4u, code->size());
-    DCHECK_EQ(literal_offset & 3u, 0u);
-    DCHECK_EQ(patch_offset & 3u, 0u);
-    DCHECK_EQ(target_offset & 3u, 0u);
-    uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
-    DCHECK_EQ(displacement & 3u, 0u);
-    DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
-    uint32_t value = (displacement & 0x0fffffffu) >> 2;
-    value |= 0x94000000;  // BL
-
-    uint8_t* addr = &(*code)[literal_offset];
-    // Check that we're just overwriting an existing BL.
-    DCHECK_EQ(addr[3] & 0xfc, 0x94);
-    // Write the new BL.
-    addr[0] = (value >> 0) & 0xff;
-    addr[1] = (value >> 8) & 0xff;
-    addr[2] = (value >> 16) & 0xff;
-    addr[3] = (value >> 24) & 0xff;
-  }
-
- private:
-  static std::vector<uint8_t> CompileThunkCode() {
-    // The thunk just uses the entry point in the ArtMethod. This works even for calls
-    // to the generic JNI and interpreter trampolines.
-    arm64::Arm64Assembler assembler;
-    Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        kArm64PointerSize).Int32Value());
-    assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
-    // Ensure we emit the literal pool.
-    assembler.EmitSlowPaths();
-    std::vector<uint8_t> thunk_code(assembler.CodeSize());
-    MemoryRegion code(thunk_code.data(), thunk_code.size());
-    assembler.FinalizeInstructions(code);
-    return thunk_code;
-  }
-
-  // Maximum positive and negative displacement measured from the patch location.
-  // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
-  // the ARM64 PC pointing to the BL.)
-  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
-  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
-
-  DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher);
-};
-
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -427,6 +90,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
     size_code_(0),
     size_code_alignment_(0),
     size_relative_call_thunks_(0),
+    size_misc_thunks_(0),
     size_mapping_table_(0),
     size_vmap_table_(0),
     size_gc_map_(0),
@@ -442,23 +106,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
 
-  switch (compiler_driver_->GetInstructionSet()) {
-    case kX86:
-    case kX86_64:
-      relative_call_patcher_.reset(new X86RelativeCallPatcher);
-      break;
-    case kArm:
-      // Fall through: we generate Thumb2 code for "arm".
-    case kThumb2:
-      relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
-      break;
-    case kArm64:
-      relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this));
-      break;
-    default:
-      relative_call_patcher_.reset(new NoRelativeCallPatcher);
-      break;
-  }
+  InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+  const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+  relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
+                                                      &method_offset_map_);
 
   size_t offset;
   {
@@ -706,7 +357,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
   bool EndClass() {
     OatDexMethodVisitor::EndClass();
     if (oat_class_index_ == writer_->oat_classes_.size()) {
-      offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr);
+      offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_);
     }
     return true;
   }
@@ -722,36 +373,36 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
 
       const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
       CHECK(quick_code != nullptr);
-      offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method);
-      offset_ = compiled_method->AlignCode(offset_);
-      DCHECK_ALIGNED_PARAM(offset_,
-                           GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
       CHECK_NE(code_size, 0U);
       uint32_t thumb_offset = compiled_method->CodeDelta();
-      quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
-
-      bool deduped = false;
 
       // Deduplicate code arrays.
+      bool deduped = false;
       auto lb = dedupe_map_.lower_bound(compiled_method);
       if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
         quick_code_offset = lb->second;
         deduped = true;
       } else {
+        offset_ = writer_->relative_patcher_->ReserveSpace(
+            offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
+        offset_ = compiled_method->AlignCode(offset_);
+        DCHECK_ALIGNED_PARAM(offset_,
+                             GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
+        quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
         dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
       }
 
       MethodReference method_ref(dex_file_, it.GetMemberIndex());
-      auto method_lb = writer_->method_offset_map_.lower_bound(method_ref);
-      if (method_lb != writer_->method_offset_map_.end() &&
-          !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) {
+      auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
+      if (method_lb != writer_->method_offset_map_.map.end() &&
+          !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
         // TODO: Should this be a hard failure?
         LOG(WARNING) << "Multiple definitions of "
             << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
             << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
       } else {
-        writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset);
+        writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
       }
 
       // Update quick method header.
@@ -790,7 +441,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
         if (!compiled_method->GetPatches().empty()) {
           uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset();
           for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-            if (patch.Type() != kLinkerPatchCallRelative) {
+            if (!patch.IsPcRelative()) {
               writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
             }
           }
@@ -799,22 +450,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
 
       if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
         // Record debug information for this function if we are doing that.
-
-        std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
-        if (deduped) {
-          // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
-          // so that it will show up in a debuggerd crash report.
-          name += " [ DEDUPED ]";
-        }
-
         const uint32_t quick_code_start = quick_code_offset -
-            writer_->oat_header_->GetExecutableOffset();
-        const DexFile::CodeItem *code_item = it.GetMethodCodeItem();
-        writer_->method_info_.push_back(DebugInfo(name,
-              dex_file_->GetSourceFile(dex_file_->GetClassDef(class_def_index_)),
-              quick_code_start, quick_code_start + code_size,
-              code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item),
-              compiled_method));
+            writer_->oat_header_->GetExecutableOffset() - thumb_offset;
+        writer_->method_info_.push_back(DebugInfo {
+            dex_file_,
+            class_def_index_,
+            it.GetMemberIndex(),
+            it.GetMethodAccessFlags(),
+            it.GetMethodCodeItem(),
+            deduped,
+            quick_code_start,
+            quick_code_start + code_size,
+            compiled_method});
       }
 
       if (kIsDebugBuild) {
@@ -851,6 +498,37 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
   }
 
  private:
+  struct CodeOffsetsKeyComparator {
+    bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
+      if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
+        return lhs->GetQuickCode() < rhs->GetQuickCode();
+      }
+      // If the code is the same, all other fields are likely to be the same as well.
+      if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
+        return lhs->GetMappingTable() < rhs->GetMappingTable();
+      }
+      if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
+        return lhs->GetVmapTable() < rhs->GetVmapTable();
+      }
+      if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
+        return lhs->GetGcMap() < rhs->GetGcMap();
+      }
+      const auto& lhs_patches = lhs->GetPatches();
+      const auto& rhs_patches = rhs->GetPatches();
+      if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
+        return lhs_patches.size() < rhs_patches.size();
+      }
+      auto rit = rhs_patches.begin();
+      for (const LinkerPatch& lpatch : lhs_patches) {
+        if (UNLIKELY(!(lpatch == *rit))) {
+          return lpatch < *rit;
+        }
+        ++rit;
+      }
+      return false;
+    }
+  };
+
   // Deduplication is already done on a pointer basis by the compiler driver,
   // so we can simply compare the pointers to find out if things are duplicated.
   SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_;
@@ -978,7 +656,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
     bool result = OatDexMethodVisitor::EndClass();
     if (oat_class_index_ == writer_->oat_classes_.size()) {
       DCHECK(result);  // OatDexMethodVisitor::EndClass() never fails.
-      offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_);
+      offset_ = writer_->relative_patcher_->WriteThunks(out_, offset_);
       if (UNLIKELY(offset_ == 0u)) {
         PLOG(ERROR) << "Failed to write final relative call thunks";
         result = false;
@@ -1000,33 +678,32 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
       if (quick_code != nullptr) {
         // Need a wrapper if we create a copy for patching.
         ArrayRef<const uint8_t> wrapped(*quick_code);
-
-        offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_);
-        if (offset_ == 0u) {
-          ReportWriteFailure("relative call thunk", it);
-          return false;
-        }
-        uint32_t aligned_offset = compiled_method->AlignCode(offset_);
-        uint32_t aligned_code_delta = aligned_offset - offset_;
-        if (aligned_code_delta != 0) {
-          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
-            ReportWriteFailure("code alignment padding", it);
-            return false;
-          }
-          offset_ += aligned_code_delta;
-          DCHECK_OFFSET_();
-        }
-        DCHECK_ALIGNED_PARAM(offset_,
-                             GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
         uint32_t code_size = quick_code->size() * sizeof(uint8_t);
         CHECK_NE(code_size, 0U);
 
         // Deduplicate code arrays.
         const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
-        DCHECK(method_offsets.code_offset_ < offset_ || method_offsets.code_offset_ ==
-                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
-            << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         if (method_offsets.code_offset_ >= offset_) {
+          offset_ = writer_->relative_patcher_->WriteThunks(out, offset_);
+          if (offset_ == 0u) {
+            ReportWriteFailure("relative call thunk", it);
+            return false;
+          }
+          uint32_t aligned_offset = compiled_method->AlignCode(offset_);
+          uint32_t aligned_code_delta = aligned_offset - offset_;
+          if (aligned_code_delta != 0) {
+            if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+              ReportWriteFailure("code alignment padding", it);
+              return false;
+            }
+            offset_ += aligned_code_delta;
+            DCHECK_OFFSET_();
+          }
+          DCHECK_ALIGNED_PARAM(offset_,
+                               GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
+          DCHECK_EQ(method_offsets.code_offset_,
+                    offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
+              << PrettyMethod(it.GetMemberIndex(), *dex_file_);
           const OatQuickMethodHeader& method_header =
               oat_class->method_headers_[method_offsets_index_];
           writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
@@ -1039,15 +716,21 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
           DCHECK_OFFSET_();
 
           if (!compiled_method->GetPatches().empty()) {
-            patched_code_ = std::vector<uint8_t>(quick_code->begin(), quick_code->end());
+            patched_code_.assign(quick_code->begin(), quick_code->end());
             wrapped = ArrayRef<const uint8_t>(patched_code_);
             for (const LinkerPatch& patch : compiled_method->GetPatches()) {
               if (patch.Type() == kLinkerPatchCallRelative) {
                 // NOTE: Relative calls across oat files are not supported.
                 uint32_t target_offset = GetTargetOffset(patch);
                 uint32_t literal_offset = patch.LiteralOffset();
-                writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset,
+                writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset,
                                                        offset_ + literal_offset, target_offset);
+              } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+                uint32_t target_offset = GetDexCacheOffset(patch);
+                uint32_t literal_offset = patch.LiteralOffset();
+                writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch,
+                                                                   offset_ + literal_offset,
+                                                                   target_offset);
               } else if (patch.Type() == kLinkerPatchCall) {
                 uint32_t target_offset = GetTargetOffset(patch);
                 PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
@@ -1102,9 +785,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
   }
 
   uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    auto target_it = writer_->method_offset_map_.find(patch.TargetMethod());
+    auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod());
     uint32_t target_offset =
-        (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u;
+        (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u;
     // If there's no compiled code, point to the correct trampoline.
     if (UNLIKELY(target_offset == 0)) {
       mirror::ArtMethod* target = GetTargetMethod(patch);
@@ -1134,6 +817,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
     return type;
   }
 
+  uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (writer_->image_writer_ != nullptr) {
+      auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress(
+              patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
+      const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
+      return reinterpret_cast<const uint8_t*>(element) - oat_data;
+    } else {
+      LOG(FATAL) << "Unimplemented.";
+      UNREACHABLE();
+    }
+  }
+
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // NOTE: Direct method pointers across oat files don't use linker patches. However, direct
@@ -1472,6 +1167,7 @@ bool OatWriter::Write(OutputStream* out) {
     DO_STAT(size_code_);
     DO_STAT(size_code_alignment_);
     DO_STAT(size_relative_call_thunks_);
+    DO_STAT(size_misc_thunks_);
     DO_STAT(size_mapping_table_);
     DO_STAT(size_vmap_table_);
     DO_STAT(size_gc_map_);
@@ -1630,6 +1326,10 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out,
 
   #undef VISIT
 
+  size_code_alignment_ += relative_patcher_->CodeAlignmentSize();
+  size_relative_call_thunks_ += relative_patcher_->RelativeCallThunksSize();
+  size_misc_thunks_ += relative_patcher_->MiscThunksSize();
+
   return relative_offset;
 }
 
@@ -1645,6 +1345,15 @@ bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delt
   return true;
 }
 
+std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) {
+  auto it = map.find(ref);
+  if (it == map.end()) {
+    return std::pair<bool, uint32_t>(false, 0u);
+  } else {
+    return std::pair<bool, uint32_t>(true, it->second);
+  }
+}
+
 OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
   offset_ = offset;
   const std::string& location(dex_file.GetLocation());
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index fd2ccae4a5..51bc9b4483 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -21,7 +21,7 @@
 #include <cstddef>
 #include <memory>
 
-#include "driver/compiler_driver.h"
+#include "linker/relative_patcher.h"  // For linker::RelativePatcherTargetProvider.
 #include "mem_map.h"
 #include "method_reference.h"
 #include "oat.h"
@@ -32,8 +32,10 @@ namespace art {
 
 class BitVector;
 class CompiledMethod;
+class CompilerDriver;
 class ImageWriter;
 class OutputStream;
+class TimingLogger;
 
 // OatHeader         variable length with count of D OatDexFiles
 //
@@ -113,25 +115,25 @@ class OatWriter {
   ~OatWriter();
 
   struct DebugInfo {
-    DebugInfo(const std::string& method_name, const char* src_file_name,
-              uint32_t low_pc, uint32_t high_pc, const uint8_t* dbgstream,
-              CompiledMethod* compiled_method)
-      : method_name_(method_name), src_file_name_(src_file_name),
-        low_pc_(low_pc), high_pc_(high_pc), dbgstream_(dbgstream),
-        compiled_method_(compiled_method) {
-    }
-    std::string method_name_;  // Note: this name is a pretty-printed name.
-    const char* src_file_name_;
-    uint32_t    low_pc_;
-    uint32_t    high_pc_;
-    const uint8_t* dbgstream_;
+    const DexFile* dex_file_;
+    size_t class_def_index_;
+    uint32_t dex_method_index_;
+    uint32_t access_flags_;
+    const DexFile::CodeItem *code_item_;
+    bool deduped_;
+    uint32_t low_pc_;
+    uint32_t high_pc_;
     CompiledMethod* compiled_method_;
   };
 
-  const std::vector<DebugInfo>& GetCFIMethodInfo() const {
+  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
+  const CompilerDriver* GetCompilerDriver() {
+    return compiler_driver_;
+  }
+
  private:
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
@@ -312,6 +314,7 @@ class OatWriter {
   uint32_t size_code_;
   uint32_t size_code_alignment_;
   uint32_t size_relative_call_thunks_;
+  uint32_t size_misc_thunks_;
   uint32_t size_mapping_table_;
   uint32_t size_vmap_table_;
   uint32_t size_gc_map_;
@@ -325,50 +328,19 @@ class OatWriter {
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
 
-  class RelativeCallPatcher;
-  class NoRelativeCallPatcher;
-  class X86RelativeCallPatcher;
-  class ArmBaseRelativeCallPatcher;
-  class Thumb2RelativeCallPatcher;
-  class Arm64RelativeCallPatcher;
-
-  std::unique_ptr<RelativeCallPatcher> relative_call_patcher_;
+  std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
   std::vector<uintptr_t> absolute_patch_locations_;
 
-  SafeMap<MethodReference, uint32_t, MethodReferenceComparator> method_offset_map_;
-
-  struct CodeOffsetsKeyComparator {
-    bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
-      if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
-        return lhs->GetQuickCode() < rhs->GetQuickCode();
-      }
-      // If the code is the same, all other fields are likely to be the same as well.
-      if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
-        return lhs->GetMappingTable() < rhs->GetMappingTable();
-      }
-      if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
-        return lhs->GetVmapTable() < rhs->GetVmapTable();
-      }
-      if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
-        return lhs->GetGcMap() < rhs->GetGcMap();
-      }
-      const auto& lhs_patches = lhs->GetPatches();
-      const auto& rhs_patches = rhs->GetPatches();
-      if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
-        return lhs_patches.size() < rhs_patches.size();
-      }
-      auto rit = rhs_patches.begin();
-      for (const LinkerPatch& lpatch : lhs_patches) {
-        if (UNLIKELY(!(lpatch == *rit))) {
-          return lpatch < *rit;
-        }
-        ++rit;
-      }
-      return false;
-    }
+  // Map method reference to assigned offset.
+  // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
+  class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider {
+   public:
+    std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE;
+    SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
   };
+  MethodOffsetMap method_offset_map_;
 
   DISALLOW_COPY_AND_ASSIGN(OatWriter);
 };
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index ab77505b6f..be432c5a20 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -59,7 +59,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
       return new (allocator) HGreaterThan(lhs, rhs);
     } else if (cond->IsGreaterThan()) {
       return new (allocator) HLessThanOrEqual(lhs, rhs);
-    } else if (cond->IsGreaterThanOrEqual()) {
+    } else {
+      DCHECK(cond->IsGreaterThanOrEqual());
       return new (allocator) HLessThan(lhs, rhs);
     }
   } else if (cond->IsIntConstant()) {
@@ -70,10 +71,11 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
       DCHECK(int_const->IsOne());
       return graph->GetIntConstant(0);
     }
+  } else {
+    // General case when 'cond' is another instruction of type boolean.
+    // Negate with 'cond == 0'.
+    return new (allocator) HEqual(cond, graph->GetIntConstant(0));
   }
-
-  // TODO: b/19992954
-  return nullptr;
 }
 
 void HBooleanSimplifier::Run() {
@@ -105,10 +107,6 @@ void HBooleanSimplifier::Run() {
     HInstruction* replacement;
     if (NegatesCondition(true_value, false_value)) {
       replacement = GetOppositeCondition(if_condition);
-      if (replacement == nullptr) {
-        // Something we could not handle.
-        continue;
-      }
       if (replacement->GetBlock() == nullptr) {
         block->InsertInstructionBefore(replacement, if_instruction);
       }
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1d167949f4..6511120794 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -239,7 +239,6 @@ class ValueBound : public ValueObject {
       *underflow = true;
       return Min();
     }
-    return ValueBound(instruction_, new_constant);
   }
 
  private:
@@ -443,9 +442,31 @@ class MonotonicValueRange : public ValueRange {
 
 class BCEVisitor : public HGraphVisitor {
  public:
+  // The least number of bounds checks that should be eliminated by triggering
+  // the deoptimization technique.
+  static constexpr size_t kThresholdForAddingDeoptimize = 2;
+
+  // Very large constant index is considered as an anomaly. This is a threshold
+  // beyond which we don't bother to apply the deoptimization technique since
+  // it's likely some AIOOBE will be thrown.
+  static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024;
+
   explicit BCEVisitor(HGraph* graph)
       : HGraphVisitor(graph),
-        maps_(graph->GetBlocks().Size()) {}
+        maps_(graph->GetBlocks().Size()),
+        need_to_revisit_block_(false) {}
+
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    first_constant_index_bounds_check_map_.clear();
+    HGraphVisitor::VisitBasicBlock(block);
+    if (need_to_revisit_block_) {
+      AddComparesWithDeoptimization(block);
+      need_to_revisit_block_ = false;
+      first_constant_index_bounds_check_map_.clear();
+      GetValueRangeMap(block)->clear();
+      HGraphVisitor::VisitBasicBlock(block);
+    }
+  }
 
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
@@ -701,9 +722,26 @@ class BCEVisitor : public HGraphVisitor {
         }
       }
 
+      if (first_constant_index_bounds_check_map_.find(array_length->GetId()) ==
+          first_constant_index_bounds_check_map_.end()) {
+        // Remember the first bounds check against array_length of a constant index.
+        // That bounds check instruction has an associated HEnvironment where we
+        // may add an HDeoptimize to eliminate bounds checks of constant indices
+        // against array_length.
+        first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
+      } else {
+        // We've seen it at least twice. It's beneficial to introduce a compare with
+        // deoptimization fallback to eliminate the bounds checks.
+        need_to_revisit_block_ = true;
+      }
+
       // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
       // We currently don't do it for non-constant index since a valid array[i] can't prove
       // a valid array[i-1] yet due to the lower bound side.
+      if (constant == INT_MAX) {
+        // INT_MAX as an index will definitely throw AIOOBE.
+        return;
+      }
       ValueBound lower = ValueBound(nullptr, constant + 1);
       ValueBound upper = ValueBound::Max();
       ValueRange* range = new (GetGraph()->GetArena())
@@ -938,8 +976,90 @@ class BCEVisitor : public HGraphVisitor {
     }
   }
 
+  void VisitDeoptimize(HDeoptimize* deoptimize) {
+    // Right now it's only HLessThanOrEqual.
+    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+    HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
+    HInstruction* instruction = less_than_or_equal->InputAt(0);
+    if (instruction->IsArrayLength()) {
+      HInstruction* constant = less_than_or_equal->InputAt(1);
+      DCHECK(constant->IsIntConstant());
+      DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize);
+      ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1);
+      ValueRange* range = new (GetGraph()->GetArena())
+          ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max());
+      GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range);
+    }
+  }
+
+  void AddCompareWithDeoptimization(HInstruction* array_length,
+                                    HIntConstant* const_instr,
+                                    HBasicBlock* block) {
+    DCHECK(array_length->IsArrayLength());
+    ValueRange* range = LookupValueRange(array_length, block);
+    ValueBound lower_bound = range->GetLower();
+    DCHECK(lower_bound.IsConstant());
+    DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
+    DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1);
+
+    // If array_length is less than lower_const, deoptimize.
+    HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
+        array_length->GetId())->AsBoundsCheck();
+    HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr);
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena())
+        HDeoptimize(cond, bounds_check->GetDexPc());
+    block->InsertInstructionBefore(cond, bounds_check);
+    block->InsertInstructionBefore(deoptimize, bounds_check);
+    deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+  }
+
+  void AddComparesWithDeoptimization(HBasicBlock* block) {
+    for (ArenaSafeMap<int, HBoundsCheck*>::iterator it =
+             first_constant_index_bounds_check_map_.begin();
+         it != first_constant_index_bounds_check_map_.end();
+         ++it) {
+      HBoundsCheck* bounds_check = it->second;
+      HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength();
+      HIntConstant* lower_bound_const_instr = nullptr;
+      int32_t lower_bound_const = INT_MIN;
+      size_t counter = 0;
+      // Count the constant indexing for which bounds checks haven't
+      // been removed yet.
+      for (HUseIterator<HInstruction*> it2(array_length->GetUses());
+           !it2.Done();
+           it2.Advance()) {
+        HInstruction* user = it2.Current()->GetUser();
+        if (user->GetBlock() == block &&
+            user->IsBoundsCheck() &&
+            user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) {
+          DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1));
+          HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant();
+          if (const_instr->GetValue() > lower_bound_const) {
+            lower_bound_const = const_instr->GetValue();
+            lower_bound_const_instr = const_instr;
+          }
+          counter++;
+        }
+      }
+      if (counter >= kThresholdForAddingDeoptimize &&
+          lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) {
+        AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block);
+      }
+    }
+  }
+
   std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
 
+  // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
+  // a block that checks a constant index against that HArrayLength.
+  SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+
+  // For the block, there is at least one HArrayLength instruction for which there
+  // is more than one bounds check instruction with constant indexing. And it's
+  // beneficial to add a compare instruction that has deoptimization fallback and
+  // eliminate those bounds checks.
+  bool need_to_revisit_block_;
+
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index b3653fe903..75cf1cf063 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
   ASSERT_FALSE(IsRemoved(bounds_check));
 }
 
-// array[5] = 1; // Can't eliminate.
-// array[4] = 1; // Can eliminate.
 // array[6] = 1; // Can't eliminate.
+// array[5] = 1; // Can eliminate.
+// array[4] = 1; // Can eliminate.
 TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
 
   HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
   HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check5 = new (&allocator)
-      HBoundsCheck(constant_5, array_length, 0);
+  HBoundsCheck* bounds_check6 = new (&allocator)
+      HBoundsCheck(constant_6, array_length, 0);
   HInstruction* array_set = new (&allocator) HArraySet(
-    null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
+    null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
-  block->AddInstruction(bounds_check5);
+  block->AddInstruction(bounds_check6);
   block->AddInstruction(array_set);
 
   null_check = new (&allocator) HNullCheck(parameter, 0);
   array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check4 = new (&allocator)
-      HBoundsCheck(constant_4, array_length, 0);
+  HBoundsCheck* bounds_check5 = new (&allocator)
+      HBoundsCheck(constant_5, array_length, 0);
   array_set = new (&allocator) HArraySet(
-    null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
+    null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
-  block->AddInstruction(bounds_check4);
+  block->AddInstruction(bounds_check5);
   block->AddInstruction(array_set);
 
   null_check = new (&allocator) HNullCheck(parameter, 0);
   array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check6 = new (&allocator)
-      HBoundsCheck(constant_6, array_length, 0);
+  HBoundsCheck* bounds_check4 = new (&allocator)
+      HBoundsCheck(constant_4, array_length, 0);
   array_set = new (&allocator) HArraySet(
-    null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
+    null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
-  block->AddInstruction(bounds_check6);
+  block->AddInstruction(bounds_check4);
   block->AddInstruction(array_set);
 
   block->AddInstruction(new (&allocator) HGoto());
@@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
   RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
-  ASSERT_FALSE(IsRemoved(bounds_check5));
-  ASSERT_TRUE(IsRemoved(bounds_check4));
   ASSERT_FALSE(IsRemoved(bounds_check6));
+  ASSERT_TRUE(IsRemoved(bounds_check5));
+  ASSERT_TRUE(IsRemoved(bounds_check4));
 }
 
 // for (int i=initial; i<array.length; i+=increment) { array[i] = 10; }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2cdd5af9f3..a912d4ccc4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -23,6 +23,7 @@
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
 #include "mirror/art_field.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/class_loader.h"
@@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
   }
 }
 
-bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
-                                    size_t number_of_blocks ATTRIBUTE_UNUSED,
+bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
                                     size_t number_of_branches) {
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
   CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
@@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
     return false;
   }
 
-  if (compiler_options.IsHugeMethod(number_of_dex_instructions)) {
+  if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << number_of_dex_instructions << " dex instructions";
+                   << ": " << code_item.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
   // If it's large and contains no branches, it's likely to be machine generated initialization.
-  if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) {
+  if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+      && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << number_of_dex_instructions << " dex instructions";
+                   << ": " << code_item.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
@@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
 
   // Compute the number of dex instructions, blocks, and branches. We will
   // check these values against limits given to the compiler.
-  size_t number_of_dex_instructions = 0;
-  size_t number_of_blocks = 0;
   size_t number_of_branches = 0;
 
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
-  ComputeBranchTargets(
-      code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches);
+  ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
 
   // Note that the compiler driver is null when unit testing.
-  if ((compiler_driver_ != nullptr)
-      && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) {
+  if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
     return false;
   }
 
@@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
 
 void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
-                                         size_t* number_of_dex_instructions,
-                                         size_t* number_of_blocks,
                                          size_t* number_of_branches) {
   branch_targets_.SetSize(code_end - code_ptr);
 
@@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
   // the locations these instructions branch to.
   uint32_t dex_pc = 0;
   while (code_ptr < code_end) {
-    (*number_of_dex_instructions)++;
     const Instruction& instruction = *Instruction::At(code_ptr);
     if (instruction.IsBranch()) {
       (*number_of_branches)++;
@@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
       if (FindBlockStartingAt(target) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
-        (*number_of_blocks)++;
       }
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
       if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
-        (*number_of_blocks)++;
       }
     } else if (instruction.IsSwitch()) {
       SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
@@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
         if (FindBlockStartingAt(target) == nullptr) {
           block = new (arena_) HBasicBlock(graph_, target);
           branch_targets_.Put(target, block);
-          (*number_of_blocks)++;
         }
 
         // The next case gets its own block.
         if (i < num_entries) {
           block = new (arena_) HBasicBlock(graph_, target);
           branch_targets_.Put(table.GetDexPcForIndex(i), block);
-          (*number_of_blocks)++;
         }
       }
 
@@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
       if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
-        (*number_of_blocks)++;
       }
     } else {
       code_ptr += instruction.SizeInCodeUnits();
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 6a0738a7b9..dc6d97eb0c 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject {
   // branches.
   void ComputeBranchTargets(const uint16_t* start,
                             const uint16_t* end,
-                            size_t* number_of_dex_instructions,
-                            size_t* number_of_block,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
@@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject {
                              HInstruction* value, int32_t case_value_int,
                              int32_t target_offset, uint32_t dex_pc);
 
-  bool SkipCompilation(size_t number_of_dex_instructions,
-                       size_t number_of_blocks,
-                       size_t number_of_branches);
+  bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
 
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bd6e943bf0..8736374306 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
   GenerateFrameEntry();
+  DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
   for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
     HBasicBlock* block = block_order_->Get(current_block_index_);
     // Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -132,7 +133,6 @@ size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
   }
   LOG(FATAL) << "Could not find a register in baseline register allocator";
   UNREACHABLE();
-  return -1;
 }
 
 size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
@@ -145,7 +145,6 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
   }
   LOG(FATAL) << "Could not find a register in baseline register allocator";
   UNREACHABLE();
-  return -1;
 }
 
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
@@ -378,10 +377,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
     case kMips:
       return nullptr;
     case kX86: {
-      return new x86::CodeGeneratorX86(graph, compiler_options);
+      return new x86::CodeGeneratorX86(graph,
+           *isa_features.AsX86InstructionSetFeatures(),
+           compiler_options);
     }
     case kX86_64: {
-      return new x86_64::CodeGeneratorX86_64(graph, compiler_options);
+      return new x86_64::CodeGeneratorX86_64(graph,
+          *isa_features.AsX86_64InstructionSetFeatures(),
+          compiler_options);
     }
     default:
       return nullptr;
@@ -413,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap(
   }
 }
 
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
+void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
+  for (size_t i = 0; i < pc_infos_.Size(); i++) {
+    struct PcInfo pc_info = pc_infos_.Get(i);
+    uint32_t pc2dex_offset = pc_info.native_pc;
+    int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+    src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
+  }
+}
+
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
   uint32_t pc2dex_data_size = 0u;
   uint32_t pc2dex_entries = pc_infos_.Size();
   uint32_t pc2dex_offset = 0u;
@@ -423,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap*
   uint32_t dex2pc_offset = 0u;
   int32_t dex2pc_dalvik_offset = 0;
 
-  if (src_map != nullptr) {
-    src_map->reserve(pc2dex_entries);
-  }
-
   for (size_t i = 0; i < pc2dex_entries; i++) {
     struct PcInfo pc_info = pc_infos_.Get(i);
     pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
     pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
     pc2dex_offset = pc_info.native_pc;
     pc2dex_dalvik_offset = pc_info.dex_pc;
-    if (src_map != nullptr) {
-      src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
-    }
   }
 
   // Walk over the blocks and find which ones correspond to catch block entries.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 07ca6b1ccf..b888aca264 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -205,7 +205,8 @@ class CodeGenerator {
     slow_paths_.Add(slow_path);
   }
 
-  void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
+  void BuildSourceMap(DefaultSrcMap* src_map) const;
+  void BuildMappingTable(std::vector<uint8_t>* vector) const;
   void BuildVMapTable(std::vector<uint8_t>* vector) const;
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -425,6 +426,8 @@ class CodeGenerator {
 
   StackMapStream stack_map_stream_;
 
+  friend class OptimizingCFITest;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1f95041a92..a799a519c0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
 };
 
+class DeoptimizationSlowPathARM : public SlowPathCodeARM {
+ public:
+  explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+  }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
+};
+
 #undef __
 
 #undef __
@@ -493,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() {
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+    return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
 void CodeGeneratorARM::GenerateFrameEntry() {
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -511,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() {
 
   // PC is in the list of callee-save to mimic Quick, but we need to push
   // LR at entry instead.
-  __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+  uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
+  __ PushList(push_mask);
+  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
+  __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize);
   }
-  __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ AddConstant(SP, -adjust);
+  __ cfi().AdjustCFAOffset(adjust);
   __ StoreToOffset(kStoreWord, R0, SP, 0);
 }
 
@@ -525,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() {
     __ bx(LR);
     return;
   }
-  __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ AddConstant(SP, adjust);
+  __ cfi().AdjustCFAOffset(-adjust);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
   __ PopList(core_spill_mask_);
 }
@@ -542,7 +581,6 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-      break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
@@ -555,10 +593,11 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimShort:
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected type " << load->GetType();
+      UNREACHABLE();
   }
 
   LOG(FATAL) << "Unreachable";
-  return Location();
+  UNREACHABLE();
 }
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -663,7 +702,6 @@ Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type ty
       return Location();
   }
   UNREACHABLE();
-  return Location();
 }
 
 void CodeGeneratorARM::Move32(Location destination, Location source) {
@@ -887,24 +925,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) {
   UNUSED(exit);
 }
 
-void LocationsBuilderARM::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::RequiresRegister());
-  }
-}
-
-void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
-  HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+                                                        Label* true_target,
+                                                        Label* false_target,
+                                                        Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
   if (cond->IsIntConstant()) {
     // Constant condition, statically compared against 1.
     int32_t cond_value = cond->AsIntConstant()->GetValue();
     if (cond_value == 1) {
-      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                     if_instr->IfTrueSuccessor())) {
-        __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+      if (always_true_target != nullptr) {
+        __ b(always_true_target);
       }
       return;
     } else {
@@ -913,10 +944,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
   } else {
     if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
       // Condition has been materialized, compare the output to 0
-      DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
-      __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(),
+      DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
+      __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
              ShifterOperand(0));
-      __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE);
+      __ b(true_target, NE);
     } else {
       // Condition has not been materialized, use its inputs as the
       // comparison and its condition as the branch condition.
@@ -938,16 +969,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
           __ cmp(left, ShifterOperand(temp));
         }
       }
-      __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
-           ARMCondition(cond->AsCondition()->GetCondition()));
+      __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
     }
   }
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                 if_instr->IfFalseSuccessor())) {
-    __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+  if (false_target != nullptr) {
+    __ b(false_target);
+  }
+}
+
+void LocationsBuilderARM::VisitIf(HIf* if_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
+  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
+void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathARM(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
 
 void LocationsBuilderARM::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
@@ -1139,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
 
 void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM::VisitReturn(HReturn* ret) {
@@ -1150,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) {
 
 void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index bcdea7a639..06f425ea21 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             Label* true_target,
+                             Label* false_target,
+                             Label* always_true_target);
 
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 32ada3837e..5fe8adc86a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
 };
 
+class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+  }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -445,18 +465,65 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
     //      ...                       : reserved frame space.
     //      sp[0]                     : current method.
     __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
-    __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
-    __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+    GetAssembler()->cfi().AdjustCFAOffset(frame_size);
+    SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
   }
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
   if (!HasEmptyFrame()) {
     int frame_size = GetFrameSize();
-    __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
-    __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+    UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
     __ Drop(frame_size);
+    GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
+  }
+}
+
+static inline dwarf::Reg DWARFReg(CPURegister reg) {
+  if (reg.IsFPRegister()) {
+    return dwarf::Reg::Arm64Fp(reg.code());
+  } else {
+    DCHECK_LT(reg.code(), 31u);  // X0 - X30.
+    return dwarf::Reg::Arm64Core(reg.code());
+  }
+}
+
+void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
+  int size = registers.RegisterSizeInBytes();
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
+    offset += 2 * size;
   }
+  if (!registers.IsEmpty()) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    __ Str(dst0, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+  }
+  DCHECK(registers.IsEmpty());
+}
+
+void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
+  int size = registers.RegisterSizeInBytes();
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().Restore(DWARFReg(dst0));
+    GetAssembler()->cfi().Restore(DWARFReg(dst1));
+    offset += 2 * size;
+  }
+  if (!registers.IsEmpty()) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().Restore(DWARFReg(dst0));
+  }
+  DCHECK(registers.IsEmpty());
 }
 
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1611,25 +1678,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
   }
 }
 
-void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::RequiresRegister());
-  }
-}
-
-void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
-  HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
+                                                          vixl::Label* true_target,
+                                                          vixl::Label* false_target,
+                                                          vixl::Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
   HCondition* condition = cond->AsCondition();
-  vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
 
   if (cond->IsIntConstant()) {
     int32_t cond_value = cond->AsIntConstant()->GetValue();
     if (cond_value == 1) {
-      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
-        __ B(true_target);
+      if (always_true_target != nullptr) {
+        __ B(always_true_target);
       }
       return;
     } else {
@@ -1637,31 +1697,87 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
     }
   } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = if_instr->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(0);
     DCHECK(cond_val.IsRegister());
-    __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
+    __ Cbnz(InputRegisterAt(instruction, 0), true_target);
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
     Register lhs = InputRegisterAt(condition, 0);
     Operand rhs = InputOperandAt(condition, 1);
     Condition arm64_cond = ARM64Condition(condition->GetCondition());
-    if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
-      if (arm64_cond == eq) {
-        __ Cbz(lhs, true_target);
-      } else {
-        __ Cbnz(lhs, true_target);
+    if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      switch (arm64_cond) {
+        case eq:
+          __ Cbz(lhs, true_target);
+          break;
+        case ne:
+          __ Cbnz(lhs, true_target);
+          break;
+        case lt:
+          // Test the sign bit and branch accordingly.
+          __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        case ge:
+          // Test the sign bit and branch accordingly.
+          __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        default:
+          // Without the `static_cast` the compiler throws an error for
+          // `-Werror=sign-promo`.
+          LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
       }
     } else {
       __ Cmp(lhs, rhs);
       __ B(arm64_cond, true_target);
     }
   }
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+  if (false_target != nullptr) {
     __ B(false_target);
   }
 }
 
+void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
+  vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  vixl::Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathARM64(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2349,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
 
 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
   UNUSED(instruction);
+  GetAssembler()->cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ Ret();
+  GetAssembler()->cfi().RestoreState();
+  GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -2359,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
 
 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
   UNUSED(instruction);
+  GetAssembler()->cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ Ret();
+  GetAssembler()->cfi().RestoreState();
+  GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 2c624d2926..9430e31037 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -23,8 +23,8 @@
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 
 namespace art {
@@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             vixl::Label* true_target,
+                             vixl::Label* false_target,
+                             vixl::Label* always_true_target);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
@@ -223,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
 
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
+  void SpillRegisters(vixl::CPURegList registers, int offset);
+  void UnspillRegisters(vixl::CPURegList registers, int offset);
 
   vixl::CPURegList GetFramePreservedCoreRegisters() const {
     return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 007e25ab4a..a6fb07fa98 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -52,7 +52,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 {
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer)));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
  private:
@@ -67,7 +67,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 {
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero)));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
  private:
@@ -116,7 +116,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
         length_location_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds)));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
  private:
@@ -137,7 +137,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -295,6 +295,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
 
+class DeoptimizationSlowPathX86 : public SlowPathCodeX86 {
+ public:
+  explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize)));
+    // No need to restore live registers.
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    codegen->RecordPcInfo(instruction_, dex_pc, this);
+  }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
+};
+
 #undef __
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
@@ -340,7 +361,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32
   return GetFloatingPointSpillSlotSize();
 }
 
-CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
+                   const X86InstructionSetFeatures& isa_features,
+                   const CompilerOptions& compiler_options)
     : CodeGenerator(graph,
                     kNumberOfCpuRegisters,
                     kNumberOfXmmRegisters,
@@ -353,7 +376,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile
       block_labels_(graph->GetArena(), 0),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
-      move_resolver_(graph->GetArena(), this) {
+      move_resolver_(graph->GetArena(), this),
+      isa_features_(isa_features) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -436,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
 void CodeGeneratorX86::GenerateFrameEntry() {
+  __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -455,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ pushl(reg);
+      __ cfi().AdjustCFAOffset(kX86WordSize);
+      __ cfi().RelOffset(DWARFReg(reg), 0);
     }
   }
 
-  __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ subl(ESP, Immediate(adjust));
+  __ cfi().AdjustCFAOffset(adjust);
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
 }
 
@@ -467,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() {
     return;
   }
 
-  __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ addl(ESP, Immediate(adjust));
+  __ cfi().AdjustCFAOffset(-adjust);
 
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ popl(reg);
+      __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
+      __ cfi().Restore(DWARFReg(reg));
     }
   }
 }
@@ -491,7 +528,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-      break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
@@ -504,10 +540,11 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimShort:
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected type " << load->GetType();
+      UNREACHABLE();
   }
 
   LOG(FATAL) << "Unreachable";
-  return Location();
+  UNREACHABLE();
 }
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -785,24 +822,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) {
   UNUSED(exit);
 }
 
-void LocationsBuilderX86::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any());
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
-  HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
+                                                        Label* true_target,
+                                                        Label* false_target,
+                                                        Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
   if (cond->IsIntConstant()) {
     // Constant condition, statically compared against 1.
     int32_t cond_value = cond->AsIntConstant()->GetValue();
     if (cond_value == 1) {
-      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                     if_instr->IfTrueSuccessor())) {
-        __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+      if (always_true_target != nullptr) {
+        __ jmp(always_true_target);
       }
       return;
     } else {
@@ -815,20 +845,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
     // evaluated just before the if, we don't need to evaluate it
     // again.
     bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
     if (materialized) {
       if (!eflags_set) {
         // Materialized condition, compare against 0.
-        Location lhs = if_instr->GetLocations()->InAt(0);
+        Location lhs = instruction->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
           __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
         } else {
           __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
         }
-        __ j(kNotEqual,  codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+        __ j(kNotEqual, true_target);
       } else {
-        __ j(X86Condition(cond->AsCondition()->GetCondition()),
-             codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+        __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
       Location lhs = cond->GetLocations()->InAt(0);
@@ -847,14 +876,54 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
-      __ j(X86Condition(cond->AsCondition()->GetCondition()),
-           codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+      __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
     }
   }
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                 if_instr->IfFalseSuccessor())) {
-    __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+  if (false_target != nullptr) {
+    __ jmp(false_target);
+  }
+}
+
+void LocationsBuilderX86::VisitIf(HIf* if_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
+  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
   }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathX86(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
 }
 
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1047,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
 
 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
@@ -1106,12 +1178,15 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
         LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
     }
   }
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -2637,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
+      locations->SetInAt(0, Location::Any());
+      // The shift count needs to be in CL or a constant.
       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      // The shift count needs to be in CL or a constant.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2665,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register first_reg = first.AsRegister<Register>();
-      if (second.IsRegister()) {
-        Register second_reg = second.AsRegister<Register>();
-        DCHECK_EQ(ECX, second_reg);
-        if (op->IsShl()) {
-          __ shll(first_reg, second_reg);
-        } else if (op->IsShr()) {
-          __ sarl(first_reg, second_reg);
+      if (first.IsRegister()) {
+        Register first_reg = first.AsRegister<Register>();
+        if (second.IsRegister()) {
+          Register second_reg = second.AsRegister<Register>();
+          DCHECK_EQ(ECX, second_reg);
+          if (op->IsShl()) {
+            __ shll(first_reg, second_reg);
+          } else if (op->IsShr()) {
+            __ sarl(first_reg, second_reg);
+          } else {
+            __ shrl(first_reg, second_reg);
+          }
         } else {
-          __ shrl(first_reg, second_reg);
+          int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+          if (shift == 0) {
+            return;
+          }
+          Immediate imm(shift);
+          if (op->IsShl()) {
+            __ shll(first_reg, imm);
+          } else if (op->IsShr()) {
+            __ sarl(first_reg, imm);
+          } else {
+            __ shrl(first_reg, imm);
+          }
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-        if (op->IsShl()) {
-          __ shll(first_reg, imm);
-        } else if (op->IsShr()) {
-          __ sarl(first_reg, imm);
+        DCHECK(first.IsStackSlot()) << first;
+        Address addr(ESP, first.GetStackIndex());
+        if (second.IsRegister()) {
+          Register second_reg = second.AsRegister<Register>();
+          DCHECK_EQ(ECX, second_reg);
+          if (op->IsShl()) {
+            __ shll(addr, second_reg);
+          } else if (op->IsShr()) {
+            __ sarl(addr, second_reg);
+          } else {
+            __ shrl(addr, second_reg);
+          }
         } else {
-          __ shrl(first_reg, imm);
+          int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+          if (shift == 0) {
+            return;
+          }
+          Immediate imm(shift);
+          if (op->IsShl()) {
+            __ shll(addr, imm);
+          } else if (op->IsShr()) {
+            __ sarl(addr, imm);
+          } else {
+            __ shrl(addr, imm);
+          }
         }
       }
+
       break;
     }
     case Primitive::kPrimLong: {
-      Register second_reg = second.AsRegister<Register>();
-      DCHECK_EQ(ECX, second_reg);
-      if (op->IsShl()) {
-        GenerateShlLong(first, second_reg);
-      } else if (op->IsShr()) {
-        GenerateShrLong(first, second_reg);
+      if (second.IsRegister()) {
+        Register second_reg = second.AsRegister<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          GenerateShlLong(first, second_reg);
+        } else if (op->IsShr()) {
+          GenerateShrLong(first, second_reg);
+        } else {
+          GenerateUShrLong(first, second_reg);
+        }
       } else {
-        GenerateUShrLong(first, second_reg);
+        // Shift by a constant.
+        int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+        // Nothing to do if the shift is 0, as the input is already the output.
+        if (shift != 0) {
+          if (op->IsShl()) {
+            GenerateShlLong(first, shift);
+          } else if (op->IsShr()) {
+            GenerateShrLong(first, shift);
+          } else {
+            GenerateUShrLong(first, shift);
+          }
+        }
       }
       break;
     }
@@ -2705,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. High gets low, and low gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToLow(), loc.ToHigh(),
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow());
+  } else if (shift > 32) {
+    // Low part becomes 0.  High part is low part << (shift-32).
+    __ movl(high, low);
+    __ shll(high, Immediate(shift - 32));
+    __ xorl(low, low);
+  } else {
+    // Between 1 and 31.
+    __ shld(high, low, Immediate(shift));
+    __ shll(low, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
   Label done;
   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2716,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Need to copy the sign.
+    DCHECK_NE(low, high);
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+  } else if (shift > 32) {
+    DCHECK_NE(low, high);
+    // High part becomes sign. Low part is shifted by shift - 32.
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+    __ shrl(low, Immediate(shift - 32));
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ sarl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2727,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. Low gets high, and high gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToHigh(), loc.ToLow(),
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh());
+  } else if (shift > 32) {
+    // Low part is high >> (shift - 32). High part becomes 0.
+    __ movl(low, high);
+    __ shrl(low, Immediate(shift - 32));
+    __ xorl(high, high);
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ shrl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3301,7 +3486,7 @@ void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruct
   Location obj = locations->InAt(0);
 
   if (obj.IsRegister()) {
-    __ cmpl(obj.AsRegister<Register>(), Immediate(0));
+    __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
   } else if (obj.IsStackSlot()) {
     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
   } else {
@@ -3487,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
       // Ensure the value is in a byte register.
       locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
     } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+      bool is_fp_type = (value_type == Primitive::kPrimFloat)
+          || (value_type == Primitive::kPrimDouble);
+      if (is_fp_type) {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+      }
     }
     // Temporary registers for the write barrier.
     if (needs_write_barrier) {
@@ -3766,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp = possible_scratch.GetRegister();
+  if (temp == kNoRegister) {
+    // Use the stack.
+    __ pushl(Address(ESP, src));
+    __ popl(Address(ESP, dst));
+  } else {
+    Register temp_reg = static_cast<Register>(temp);
+    __ movl(temp_reg, Address(ESP, src));
+    __ movl(Address(ESP, dst), temp_reg);
+  }
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
-  __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
-  __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp = possible_scratch.GetRegister();
+  if (temp == kNoRegister) {
+    // Use the stack instead.
+    // Push src low word.
+    __ pushl(Address(ESP, src));
+    // Push src high word. Stack offset = 4.
+    __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+    // Pop into dst high word. Stack offset = 8.
+    // Pop with ESP address uses the 'after increment' value of ESP.
+    __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+    // Finally dst low word. Stack offset = 4.
+    __ popl(Address(ESP, dst));
+  } else {
+    Register temp_reg = static_cast<Register>(temp);
+    __ movl(temp_reg, Address(ESP, src));
+    __ movl(Address(ESP, dst), temp_reg);
+    __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+    __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+  }
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3847,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
           __ xorps(dest, dest);
         } else {
           ScratchRegisterScope ensure_scratch(
-              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-          __ movl(temp, Immediate(value));
-          __ movd(dest, temp);
+              this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+          int temp_reg = ensure_scratch.GetRegister();
+          if (temp_reg == kNoRegister) {
+            // Avoid spilling/restoring a scratch register by using the stack.
+            __ pushl(Immediate(value));
+            __ movss(dest, Address(ESP, 0));
+            __ addl(ESP, Immediate(4));
+          } else {
+            Register temp = static_cast<Register>(temp_reg);
+            __ movl(temp, Immediate(value));
+            __ movd(dest, temp);
+          }
         }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
@@ -3899,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
   }
 }
 
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
-  Register suggested_scratch = reg == EAX ? EBX : EAX;
-  ScratchRegisterScope ensure_scratch(
-      this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+  // Prefer to avoid xchg as it isn't speedy on smaller processors.
+  ScratchRegisterScope possible_scratch(
+      this, reg1, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister || temp_reg == reg2) {
+    __ pushl(reg1);
+    __ movl(reg1, reg2);
+    __ popl(reg2);
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, reg1);
+    __ movl(reg1, reg2);
+    __ movl(reg2, temp);
+  }
+}
 
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
-  __ movl(Address(ESP, mem + stack_offset), reg);
-  __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+  ScratchRegisterScope possible_scratch(
+      this, reg, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    __ pushl(Address(ESP, mem));
+    __ movl(Address(ESP, mem + kX86WordSize), reg);
+    __ popl(reg);
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, Address(ESP, mem));
+    __ movl(Address(ESP, mem), reg);
+    __ movl(reg, temp);
+  }
 }
 
 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, mem + stack_offset));
-  __ movss(Address(ESP, mem + stack_offset), reg);
-  __ movd(reg, temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    __ pushl(Address(ESP, mem));
+    __ movss(Address(ESP, mem + kX86WordSize), reg);
+    __ movss(reg, Address(ESP, 0));
+    __ addl(ESP, Immediate(kX86WordSize));
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, Address(ESP, mem));
+    __ movss(Address(ESP, mem), reg);
+    __ movd(reg, temp);
+  }
 }
 
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
-  ScratchRegisterScope ensure_scratch1(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
-  Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
-  ScratchRegisterScope ensure_scratch2(
-      this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
-
-  int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
-  stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
-  __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
-  __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
-  __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
-  __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+  ScratchRegisterScope possible_scratch1(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg1 = possible_scratch1.GetRegister();
+  if (temp_reg1 == kNoRegister) {
+    // No free registers.  Use the stack.
+    __ pushl(Address(ESP, mem1));
+    __ pushl(Address(ESP, mem2 + kX86WordSize));
+    // Pop with ESP address uses the 'after increment' value of ESP.
+    __ popl(Address(ESP, mem1 + kX86WordSize));
+    __ popl(Address(ESP, mem2));
+  } else {
+    // Got the first one.  Try for a second.
+    ScratchRegisterScope possible_scratch2(
+        this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+    int temp_reg2 = possible_scratch2.GetRegister();
+    if (temp_reg2 == kNoRegister) {
+      Register temp = static_cast<Register>(temp_reg1);
+      // Bummer.  Only have one free register to use.
+      // Save mem1 on the stack.
+      __ pushl(Address(ESP, mem1));
+
+      // Copy mem2 into mem1.
+      __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+      __ movl(Address(ESP, mem1 + kX86WordSize), temp);
+
+      // Now pop mem1 into mem2.
+      // Pop with ESP address uses the 'after increment' value of ESP.
+      __ popl(Address(ESP, mem2));
+    } else {
+      // Great.  We have 2 registers to play with.
+      Register temp1 = static_cast<Register>(temp_reg1);
+      Register temp2 = static_cast<Register>(temp_reg2);
+      DCHECK_NE(temp1, temp2);
+      __ movl(temp1, Address(ESP, mem1));
+      __ movl(temp2, Address(ESP, mem2));
+      __ movl(Address(ESP, mem2), temp1);
+      __ movl(Address(ESP, mem1), temp2);
+    }
+  }
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -3943,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index a5489d2844..8c56e35329 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver {
   X86Assembler* GetAssembler() const;
 
  private:
+  void Exchange(Register reg1, Register Reg2);
   void Exchange(Register reg, int mem);
   void Exchange(int mem1, int mem2);
   void Exchange32(XmmRegister reg, int mem);
@@ -171,6 +172,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
   void GenerateUShrLong(const Location& loc, Register shifter);
+  void GenerateShlLong(const Location& loc, int shift);
+  void GenerateShrLong(const Location& loc, int shift);
+  void GenerateUShrLong(const Location& loc, int shift);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -179,6 +183,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
 
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             Label* true_target,
+                             Label* false_target,
+                             Label* always_true_target);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -188,7 +196,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
 
 class CodeGeneratorX86 : public CodeGenerator {
  public:
-  CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options);
+  CodeGeneratorX86(HGraph* graph,
+                   const X86InstructionSetFeatures& isa_features,
+                   const CompilerOptions& compiler_options);
   virtual ~CodeGeneratorX86() {}
 
   void GenerateFrameEntry() OVERRIDE;
@@ -274,6 +284,10 @@ class CodeGeneratorX86 : public CodeGenerator {
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
+  const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
@@ -282,6 +296,7 @@ class CodeGeneratorX86 : public CodeGenerator {
   InstructionCodeGeneratorX86 instruction_visitor_;
   ParallelMoveResolverX86 move_resolver_;
   X86Assembler assembler_;
+  const X86InstructionSetFeatures& isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
 };
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2bb0349932..01b24ea33f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -315,6 +315,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
 };
 
+class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+  explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+      : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    __ gs()->call(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true));
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    codegen->RecordPcInfo(instruction_, dex_pc, this);
+  }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
+};
+
 #undef __
 #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -391,7 +412,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin
 static constexpr int kNumberOfCpuRegisterPairs = 0;
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
-CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
+                const X86_64InstructionSetFeatures& isa_features,
+                const CompilerOptions& compiler_options)
       : CodeGenerator(graph,
                       kNumberOfCpuRegisters,
                       kNumberOfFloatRegisters,
@@ -405,7 +428,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c
         block_labels_(graph->GetArena(), 0),
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
-        move_resolver_(graph->GetArena(), this) {
+        move_resolver_(graph->GetArena(), this),
+        isa_features_(isa_features),
+        constant_area_start_(0) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -458,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+    return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
 void CodeGeneratorX86_64::GenerateFrameEntry() {
+  __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check = IsLeafMethod()
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -478,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ pushq(CpuRegister(reg));
+      __ cfi().AdjustCFAOffset(kX86_64WordSize);
+      __ cfi().RelOffset(DWARFReg(reg), 0);
     }
   }
 
-  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+  int adjust = GetFrameSize() - GetCoreSpillSize();
+  __ subq(CpuRegister(RSP), Immediate(adjust));
+  __ cfi().AdjustCFAOffset(adjust);
   uint32_t xmm_spill_location = GetFpuSpillStart();
   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
 
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
-      __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
-               XmmRegister(kFpuCalleeSaves[i]));
+      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+      __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+      __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
     }
   }
 
@@ -503,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() {
   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
-      __ movsd(XmmRegister(kFpuCalleeSaves[i]),
-               Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+      __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
+      __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
     }
   }
 
-  __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+  int adjust = GetFrameSize() - GetCoreSpillSize();
+  __ addq(CpuRegister(RSP), Immediate(adjust));
+  __ cfi().AdjustCFAOffset(-adjust);
 
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ popq(CpuRegister(reg));
+      __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+      __ cfi().Restore(DWARFReg(reg));
     }
   }
 }
@@ -532,7 +575,6 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-      break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
@@ -545,10 +587,11 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
     case Primitive::kPrimShort:
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected type " << load->GetType();
+      UNREACHABLE();
   }
 
   LOG(FATAL) << "Unreachable";
-  return Location();
+  UNREACHABLE();
 }
 
 void CodeGeneratorX86_64::Move(Location destination, Location source) {
@@ -607,7 +650,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
                source.AsFpuRegister<XmmRegister>());
     } else if (source.IsConstant()) {
       HConstant* constant = source.GetConstant();
-      int64_t value = constant->AsLongConstant()->GetValue();
+      int64_t value;
       if (constant->IsDoubleConstant()) {
         value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       } else {
@@ -735,24 +778,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
   UNUSED(exit);
 }
 
-void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any());
-  }
-}
-
-void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
-  HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           Label* true_target,
+                                                           Label* false_target,
+                                                           Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
   if (cond->IsIntConstant()) {
     // Constant condition, statically compared against 1.
     int32_t cond_value = cond->AsIntConstant()->GetValue();
     if (cond_value == 1) {
-      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                     if_instr->IfTrueSuccessor())) {
-        __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+      if (always_true_target != nullptr) {
+        __ jmp(always_true_target);
       }
       return;
     } else {
@@ -765,21 +801,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
     // evaluated just before the if, we don't need to evaluate it
     // again.
     bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
     if (materialized) {
       if (!eflags_set) {
         // Materialized condition, compare against 0.
-        Location lhs = if_instr->GetLocations()->InAt(0);
+        Location lhs = instruction->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
           __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
         } else {
           __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
                   Immediate(0));
         }
-        __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+        __ j(kNotEqual, true_target);
       } else {
-        __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
-             codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+        __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
       Location lhs = cond->GetLocations()->InAt(0);
@@ -797,16 +832,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
         __ cmpl(lhs.AsRegister<CpuRegister>(),
                 Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
-      __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
-           codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+      __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
     }
   }
-  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                 if_instr->IfFalseSuccessor())) {
-    __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+  if (false_target != nullptr) {
+    __ jmp(false_target);
+  }
+}
+
+void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
   }
 }
 
+void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
+  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any());
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathX86_64(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1068,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
 
 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
@@ -1120,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
     }
   }
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -1181,7 +1262,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -1242,7 +1323,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
 }
 
 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -1896,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -1960,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
     }
 
     case Primitive::kPrimFloat: {
-      __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -1993,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2031,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
     }
 
     case Primitive::kPrimFloat: {
-      __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2069,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2114,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
 
     case Primitive::kPrimFloat: {
       DCHECK(first.Equals(locations->Out()));
-      __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
       DCHECK(first.Equals(locations->Out()));
-      __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2493,7 +2628,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2518,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
     }
 
     case Primitive::kPrimFloat: {
-      __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -3668,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
 
 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
-      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+      this, TMP, codegen_->GetNumberOfCoreRegisters());
 
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
-  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
-  __ movq(CpuRegister(ensure_scratch.GetRegister()),
-          Address(CpuRegister(RSP), mem2 + stack_offset));
-  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
-  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
-          CpuRegister(ensure_scratch.GetRegister()));
+  int temp_reg = ensure_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    // Use the stack as a temporary.
+    // Save mem1 on the stack.
+    __ pushq(Address(CpuRegister(RSP), mem1));
+
+    // Copy mem2 into mem1.
+    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+    __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+    // Now pop mem1 into mem2.
+    __ popq(Address(CpuRegister(RSP), mem2));
+  } else {
+    CpuRegister temp = CpuRegister(temp_reg);
+    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+    __ movq(temp, Address(CpuRegister(RSP), mem2));
+    __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+    __ movq(Address(CpuRegister(RSP), mem1), temp);
+  }
 }
 
 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3685,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
   __ movd(reg, CpuRegister(TMP));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+  // Prefer to avoid xchg as it isn't speedy on smaller processors.
+  __ movq(CpuRegister(TMP), reg1);
+  __ movq(reg1, reg2);
+  __ movq(reg2, CpuRegister(TMP));
+}
+
 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3697,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+    Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -4062,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+  // Generate the constant area if needed.
+  X86_64Assembler* assembler = GetAssembler();
+  if (!assembler->IsConstantAreaEmpty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+    // byte values.  If used for vectors at a later time, this will need to be
+    // updated to 16 bytes with the appropriate offset.
+    assembler->Align(4, 0);
+    constant_area_start_ = assembler->CodeSize();
+    assembler->AddConstantArea();
+  }
+
+  // And finish up.
+  CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+  public:
+    RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
+      : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+  private:
+    void Process(const MemoryRegion& region, int pos) OVERRIDE {
+      // Patch the correct offset for the instruction.  We use the address of the
+      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+      int relative_position = constant_offset - pos;
+
+      // Patch in the right value.
+      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+    }
+
+    const CodeGeneratorX86_64& codegen_;
+
+    // Location in constant area that the fixup refers to.
+    int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+  return Address::RIP(fixup);
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f6fbc2e6bc..61bf6ac71d 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
@@ -185,6 +186,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             Label* true_target,
+                             Label* false_target,
+                             Label* always_true_target);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
@@ -194,7 +199,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
 
 class CodeGeneratorX86_64 : public CodeGenerator {
  public:
-  CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options);
+  CodeGeneratorX86_64(HGraph* graph,
+                  const X86_64InstructionSetFeatures& isa_features,
+                  const CompilerOptions& compiler_options);
   virtual ~CodeGeneratorX86_64() {}
 
   void GenerateFrameEntry() OVERRIDE;
@@ -240,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   InstructionSet GetInstructionSet() const OVERRIDE {
     return InstructionSet::kX86_64;
@@ -267,6 +275,19 @@ class CodeGeneratorX86_64 : public CodeGenerator {
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
 
+  const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
+  int ConstantAreaStart() const {
+    return constant_area_start_;
+  }
+
+  Address LiteralDoubleAddress(double v);
+  Address LiteralFloatAddress(float v);
+  Address LiteralInt32Address(int32_t v);
+  Address LiteralInt64Address(int64_t v);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
@@ -275,6 +296,11 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   InstructionCodeGeneratorX86_64 instruction_visitor_;
   ParallelMoveResolverX86_64 move_resolver_;
   X86_64Assembler assembler_;
+  const X86_64InstructionSetFeatures& isa_features_;
+
+  // Offset to the start of the constant area in the assembled code.
+  // Used for fixups to the constant area.
+  int constant_area_start_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6053ad51f4..2be117bf38 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -19,6 +19,8 @@
 #include "arch/instruction_set.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "base/macros.h"
 #include "builder.h"
 #include "code_generator_arm.h"
@@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
   InternalCodeAllocator allocator;
 
   CompilerOptions compiler_options;
-  x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
   // We avoid doing a stack overflow check that requires the runtime being setup,
   // by making sure the compiler knows the methods we are running are leaf methods.
   codegenX86.CompileBaseline(&allocator, true);
@@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
     Run(allocator, codegenARM, has_result, expected);
   }
 
-  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+      X86_64InstructionSetFeatures::FromCppDefines());
+  x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
   codegenX86_64.CompileBaseline(&allocator, true);
   if (kRuntimeISA == kX86_64) {
     Run(allocator, codegenX86_64, has_result, expected);
@@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph,
                                            compiler_options);
     RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kX86) {
-    x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
     RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kX86_64) {
-    x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+    std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+        X86_64InstructionSetFeatures::FromCppDefines());
+    x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
   }
 }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index fd8c0c6242..966165bf4c 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -20,8 +20,8 @@
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
 
 namespace art {
 namespace arm64 {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 6853d54c48..02ad675dc3 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -16,6 +16,7 @@
 
 #include <functional>
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator_x86.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data,
   std::string actual_before = printer_before.str();
   ASSERT_EQ(expected_before, actual_before);
 
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HConstantFolding(graph).Run();
   SSAChecker ssa_checker_cf(&allocator, graph);
   ssa_checker_cf.Run();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index a644719622..98ae1ec5d3 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator_x86.h"
 #include "dead_code_elimination.h"
 #include "driver/compiler_options.h"
@@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data,
   std::string actual_before = printer_before.str();
   ASSERT_EQ(actual_before, expected_before);
 
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HDeadCodeElimination(graph).Run();
   SSAChecker ssa_checker(&allocator, graph);
   ssa_checker.Run();
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 49c0d3884f..4c283788b5 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -337,13 +337,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
 
 HGraphVisualizer::HGraphVisualizer(std::ostream* output,
                                    HGraph* graph,
-                                   const CodeGenerator& codegen,
-                                   const char* method_name)
-  : output_(output), graph_(graph), codegen_(codegen) {
-  if (output == nullptr) {
-    return;
-  }
+                                   const CodeGenerator& codegen)
+  : output_(output), graph_(graph), codegen_(codegen) {}
 
+void HGraphVisualizer::PrintHeader(const char* method_name) const {
+  DCHECK(output_ != nullptr);
   HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", method_name);
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index bc553aed74..513bceb369 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -35,9 +35,9 @@ class HGraphVisualizer : public ValueObject {
  public:
   HGraphVisualizer(std::ostream* output,
                    HGraph* graph,
-                   const CodeGenerator& codegen,
-                   const char* method_name);
+                   const CodeGenerator& codegen);
 
+  void PrintHeader(const char* method_name) const;
   void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
 
  private:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 628a844cc7..20aa45f197 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -90,7 +90,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
     case kIntrinsicReverseBytes:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimShort:
@@ -103,7 +102,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
 
     // Abs.
     case kIntrinsicAbsDouble:
@@ -166,7 +164,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
 
     // Memory.poke.
     case kIntrinsicPoke:
@@ -183,7 +180,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
 
     // String.
     case kIntrinsicCharAt:
@@ -211,7 +207,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
     case kIntrinsicUnsafeGet: {
       const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
       switch (GetType(method.d.data, false)) {
@@ -225,7 +220,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
-      break;
     }
     case kIntrinsicUnsafePut: {
       enum Sync { kNoSync, kVolatile, kOrdered };
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 33176f009c..94e27e912e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -776,10 +776,10 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
   __ mov(out, ShifterOperand(0), CC);
 }
 
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 72d303c870..d1176c460f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,8 +28,8 @@
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/arm64/constants_arm64.h"
 
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
 
 using namespace vixl;   // NOLINT(build/namespaces)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 384737f55a..aec2d19b1d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,7 @@
 
 #include "intrinsics_x86.h"
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
@@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000;
 static constexpr int kDoubleNaNLow = 0x00000000;
 static constexpr int kFloatNaN = 0x7FC00000;
 
+IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
+  : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
   return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
 }
@@ -152,6 +158,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
+      RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
     } else {
       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
       UNREACHABLE();
@@ -313,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
 }
 
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location input = locations->InAt(0);
+  Register input_lo = input.AsRegisterPairLow<Register>();
+  Register input_hi = input.AsRegisterPairHigh<Register>();
+  Location output = locations->Out();
+  Register output_lo = output.AsRegisterPairLow<Register>();
+  Register output_hi = output.AsRegisterPairHigh<Register>();
+
+  X86Assembler* assembler = GetAssembler();
+  // Assign the inputs to the outputs, mixing low/high.
+  __ movl(output_lo, input_hi);
+  __ movl(output_hi, input_lo);
+  __ bswapl(output_lo);
+  __ bswapl(output_hi);
+}
+
 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -719,6 +747,149 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
   GetAssembler()->sqrtsd(out, in);
 }
 
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
+  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+  // Copy the result back to the expected output.
+  Location out = invoke->GetLocations()->Out();
+  if (out.IsValid()) {
+    DCHECK(out.IsRegister());
+    MoveFromReturnRegister(out, invoke->GetType(), codegen);
+  }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke,
+                                      CodeGeneratorX86* codegen) {
+  // Do we have instruction support?
+  if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+    CreateFPToFPLocations(arena, invoke);
+    return;
+  }
+
+  // We have to fall back to a call to the intrinsic.
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+  // Needs to be EAX for the invoke.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
+                                   HInvoke* invoke,
+                                   X86Assembler* assembler,
+                                   int round_mode) {
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations->WillCall()) {
+    InvokeOutOfLineIntrinsic(codegen, invoke);
+  } else {
+    XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+    XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+    __ roundsd(out, in, Immediate(round_mode));
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
+// as it needs 64 bit instructions.
+void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
+  // Do we have instruction support?
+  if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+    LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                              LocationSummary::kNoCall,
+                                                              kIntrinsified);
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+    return;
+  }
+
+  // We have to fall back to a call to the intrinsic.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+  // Needs to be EAX for the invoke.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations->WillCall()) {
+    InvokeOutOfLineIntrinsic(codegen_, invoke);
+    return;
+  }
+
+  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
+  XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  Label done, nan;
+  X86Assembler* assembler = GetAssembler();
+
+  // Generate 0.5 into inPlusPointFive.
+  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+  __ movd(inPlusPointFive, out);
+
+  // Add in the input.
+  __ addss(inPlusPointFive, in);
+
+  // And truncate to an integer.
+  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+  __ movl(out, Immediate(kPrimIntMax));
+  // maxInt = int-to-float(out)
+  __ cvtsi2ss(maxInt, out);
+
+  // if inPlusPointFive >= maxInt goto done
+  __ comiss(inPlusPointFive, maxInt);
+  __ j(kAboveEqual, &done);
+
+  // if input == NaN goto nan
+  __ j(kUnordered, &nan);
+
+  // output = float-to-int-truncate(input)
+  __ cvttss2si(out, inPlusPointFive);
+  __ jmp(&done);
+  __ Bind(&nan);
+
+  //  output = 0
+  __ xorl(out, out);
+  __ Bind(&done);
+}
+
 void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1180,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Offset is a long, but in 32 bit mode, we only need the low word.
+  // Can we update the invoke here to remove a TypeConvert to Long?
+  locations->SetInAt(2, Location::RequiresRegister());
+  // Expected value must be in EAX or EDX:EAX.
+  // For long, new value must be in ECX:EBX.
+  if (type == Primitive::kPrimLong) {
+    locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+    locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+  } else {
+    locations->SetInAt(3, Location::RegisterLocation(EAX));
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+
+  // Force a byte register for the output.
+  locations->SetOut(Location::RegisterLocation(EAX));
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    // Need a byte register for marking.
+    locations->AddTemp(Location::RegisterLocation(ECX));
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register base = locations->InAt(1).AsRegister<Register>();
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location out = locations->Out();
+  DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+  if (type == Primitive::kPrimLong) {
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+    __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+  } else {
+    // Integer or object.
+    DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+    Register value = locations->InAt(4).AsRegister<Register>();
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setb(kZero, out.AsRegister<Register>());
+  __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+                     X86Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg = locations->InAt(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  // We want to swap high/low, then bswap each one, and then do the same
+  // as a 32 bit reverse.
+  // Exchange high and low.
+  __ movl(temp, reg_low);
+  __ movl(reg_low, reg_high);
+  __ movl(reg_high, temp);
+
+  // bit-reverse low
+  __ bswapl(reg_low);
+  SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+  // bit-reverse high
+  __ bswapl(reg_high);
+  SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1188,20 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
 void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index e1e8260a5f..4292ec7b99 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -32,7 +32,7 @@ class X86Assembler;
 
 class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
  public:
-  explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {}
+  explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen);
 
   // Define visitor methods.
 
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 
  private:
   ArenaAllocator* arena_;
+  CodeGeneratorX86* codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
 };
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 736cea88cb..cbf94f0f81 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,7 @@
 
 #include "intrinsics_x86_64.h"
 
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
@@ -30,6 +31,11 @@ namespace art {
 
 namespace x86_64 {
 
+IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
+  : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
   return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
 }
@@ -292,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
   // TODO: Allow x86 to work with memory. This requires assembler support, see below.
   // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
   locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
-  locations->AddTemp(Location::RequiresFpuRegister());  // FP version of above.
+  locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+                      bool is64bit,
+                      X86_64Assembler* assembler,
+                      CodeGeneratorX86_64* codegen) {
   Location output = locations->Out();
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (output.IsFpuRegister()) {
     // In-register
-    XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+    XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
 
+    // TODO: Can mask directly with constant area using pand if we can guarantee
+    // that the literal is aligned on a 16 byte boundary.  This will avoid a
+    // temporary.
     if (is64bit) {
-      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
       __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
     } else {
-      __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
       __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
     }
   } else {
@@ -335,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -343,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -393,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
   GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
 }
 
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
-                        X86_64Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        X86_64Assembler* assembler,
+                        CodeGeneratorX86_64* codegen) {
   Location op1_loc = locations->InAt(0);
   Location op2_loc = locations->InAt(1);
   Location out_loc = locations->Out();
@@ -421,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
   //
   // This removes one jmp, but needs to copy one input (op1) to out.
   //
-  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+  // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
 
   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
 
@@ -455,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
 
   // NaN handling.
   __ Bind(&nan);
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-  // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
   if (is_double) {
-    __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+    __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
   } else {
-    __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+    __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
   }
-  __ movd(out, cpu_temp, is_double);
   __ jmp(&done);
 
   // out := op2;
@@ -477,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
   __ Bind(&done);
 }
 
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -486,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
   // the second input to be the output (we can simply swap inputs).
   locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -614,6 +621,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
   GetAssembler()->sqrtsd(out, in);
 }
 
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+  // Copy the result back to the expected output.
+  Location out = invoke->GetLocations()->Out();
+  if (out.IsValid()) {
+    DCHECK(out.IsRegister());
+    MoveFromReturnRegister(out, invoke->GetType(), codegen);
+  }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+                                      HInvoke* invoke,
+                                      CodeGeneratorX86_64* codegen) {
+  // Do we have instruction support?
+  if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+    CreateFPToFPLocations(arena, invoke);
+    return;
+  }
+
+  // We have to fall back to a call to the intrinsic.
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::FpuRegisterLocation(XMM0));
+  // Needs to be RDI for the invoke.
+  locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
+                                   HInvoke* invoke,
+                                   X86_64Assembler* assembler,
+                                   int round_mode) {
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations->WillCall()) {
+    InvokeOutOfLineIntrinsic(codegen, invoke);
+  } else {
+    XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+    XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+    __ roundsd(out, in, Immediate(round_mode));
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
+  CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
+  GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
+                                       HInvoke* invoke,
+                                       CodeGeneratorX86_64* codegen) {
+  // Do we have instruction support?
+  if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+    LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                              LocationSummary::kNoCall,
+                                                              kIntrinsified);
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+    return;
+  }
+
+  // We have to fall back to a call to the intrinsic.
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+  // Needs to be RDI for the invoke.
+  locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations->WillCall()) {
+    InvokeOutOfLineIntrinsic(codegen_, invoke);
+    return;
+  }
+
+  // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
+  XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  Label done, nan;
+  X86_64Assembler* assembler = GetAssembler();
+
+  // Generate 0.5 into inPlusPointFive.
+  __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+  __ movd(inPlusPointFive, out, false);
+
+  // Add in the input.
+  __ addss(inPlusPointFive, in);
+
+  // And truncate to an integer.
+  __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+  __ movl(out, Immediate(kPrimIntMax));
+  // maxInt = int-to-float(out)
+  __ cvtsi2ss(maxInt, out);
+
+  // if inPlusPointFive >= maxInt goto done
+  __ comiss(inPlusPointFive, maxInt);
+  __ j(kAboveEqual, &done);
+
+  // if input == NaN goto nan
+  __ j(kUnordered, &nan);
+
+  // output = float-to-int-truncate(input)
+  __ cvttss2si(out, inPlusPointFive);
+  __ jmp(&done);
+  __ Bind(&nan);
+
+  //  output = 0
+  __ xorl(out, out);
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+  CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations->WillCall()) {
+    InvokeOutOfLineIntrinsic(codegen_, invoke);
+    return;
+  }
+
+  // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
+  XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+  XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  Label done, nan;
+  X86_64Assembler* assembler = GetAssembler();
+
+  // Generate 0.5 into inPlusPointFive.
+  __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
+  __ movd(inPlusPointFive, out, true);
+
+  // Add in the input.
+  __ addsd(inPlusPointFive, in);
+
+  // And truncate to an integer.
+  __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+  __ movq(out, Immediate(kPrimLongMax));
+  // maxLong = long-to-double(out)
+  __ cvtsi2sd(maxLong, out, true);
+
+  // if inPlusPointFive >= maxLong goto done
+  __ comisd(inPlusPointFive, maxLong);
+  __ j(kAboveEqual, &done);
+
+  // if input == NaN goto nan
+  __ j(kUnordered, &nan);
+
+  // output = double-to-long-truncate(input)
+  __ cvttsd2si(out, inPlusPointFive, true);
+  __ jmp(&done);
+  __ Bind(&nan);
+
+  //  output = 0
+  __ xorq(out, out);
+  __ Bind(&done);
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -999,6 +1203,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  // expected value must be in EAX/RAX.
+  locations->SetInAt(3, Location::RegisterLocation(RAX));
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+  DCHECK_EQ(expected.AsRegister(), RAX);
+  CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+  } else {
+    // Integer or object.
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                          locations->GetTemp(1).AsRegister<CpuRegister>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setcc(kZero, out);
+  __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+                     X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+                       int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  __ movq(temp_mask, Immediate(mask));
+  __ movq(temp, reg);
+  __ shrq(reg, imm_shift);
+  __ andq(temp, temp_mask);
+  __ andq(reg, temp_mask);
+  __ shlq(temp, imm_shift);
+  __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+   * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+   * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+   */
+  __ bswapq(reg);
+  SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+  SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+  SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1007,19 +1380,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index dfae7fa90e..0e0e72c1fc 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -32,7 +32,7 @@ class X86_64Assembler;
 
 class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
  public:
-  explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+  explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen);
 
   // Define visitor methods.
 
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 
  private:
   ArenaAllocator* arena_;
+  CodeGeneratorX86_64* codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
 };
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f22b7a7e82..28c5555d57 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
 
 #include <fstream>
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "base/arena_allocator.h"
 #include "base/stringprintf.h"
 #include "builder.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
 
   graph->TryBuildingSsa();
 
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index c102c4f02f..61d6593f2b 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) {
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
 
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) {
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
   RemoveSuspendChecks(graph);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b0cfde0cf..81250ca133 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) {
   graph->TryBuildingSsa();
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index dca612e6b7..d8a8554610 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const {
   }
 }
 
-bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
-  return this == if_->GetPreviousDisregardingMoves();
+bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
+  return this == instruction->GetPreviousDisregardingMoves();
 }
 
 bool HInstruction::Equals(HInstruction* other) const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 21ed3504f1..f764eb421f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
   M(Condition, BinaryOperation)                                         \
+  M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
   M(DivZeroCheck, Instruction)                                          \
   M(DoubleConstant, Constant)                                           \
@@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
 
   bool HasEnvironment() const { return environment_ != nullptr; }
   HEnvironment* GetEnvironment() const { return environment_; }
-  void SetEnvironment(HEnvironment* environment) { environment_ = environment; }
+  // Set the `environment_` field. Raw because this method does not
+  // update the uses lists.
+  void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; }
+
+  // Set the environment of this instruction, copying it from `environment`. While
+  // copying, the uses lists are being updated.
+  void CopyEnvironmentFrom(HEnvironment* environment) {
+    ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_->CopyFrom(environment);
+  }
 
   // Returns the number of entries in the environment. Typically, that is the
   // number of dex registers in a method. It could be more in case of inlining.
@@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> {
 
   DECLARE_INSTRUCTION(If);
 
-  virtual bool IsIfInstruction() const { return true; }
-
  private:
   DISALLOW_COPY_AND_ASSIGN(HIf);
 };
 
+// Deoptimize to interpreter, upon checking a condition.
+class HDeoptimize : public HTemplateInstruction<1> {
+ public:
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::None()),
+        dex_pc_(dex_pc) {
+    SetRawInputAt(0, cond);
+  }
+
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return true; }
+  uint32_t GetDexPc() const { return dex_pc_; }
+
+  DECLARE_INSTRUCTION(Deoptimize);
+
+ private:
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
+};
+
 class HUnaryOperation : public HExpression<1> {
  public:
   HUnaryOperation(Primitive::Type result_type, HInstruction* input)
@@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation {
   void ClearNeedsMaterialization() { needs_materialization_ = false; }
 
   // For code generation purposes, returns whether this instruction is just before
-  // `if_`, and disregard moves in between.
-  bool IsBeforeWhenDisregardMoves(HIf* if_) const;
+  // `instruction`, and disregard moves in between.
+  bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
 
   DECLARE_INSTRUCTION(Condition);
 
@@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> {
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
 
+  // May throw NegativeArraySizeException, OutOfMemoryError, etc.
+  bool CanThrow() const OVERRIDE { return true; }
+
   bool CanBeNull() const OVERRIDE { return false; }
 
   QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4cf22d3b2e..4e83ce576c 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) {
   exit_block->AddInstruction(new (&allocator) HExit());
 
   HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
-  null_check->SetEnvironment(environment);
+  null_check->SetRawEnvironment(environment);
   environment->SetRawEnvAt(0, parameter);
   parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
 
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
new file mode 100644
index 0000000000..6d986ba7d3
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "optimizing/code_generator.h"
+#include "utils/assembler.h"
+
+#include "optimizing/optimizing_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class OptimizingCFITest  : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Setup simple context.
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    CompilerOptions opts;
+    std::unique_ptr<const InstructionSetFeatures> isa_features;
+    std::string error;
+    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    HGraph graph(&allocator);
+    // Generate simple frame with some spills.
+    std::unique_ptr<CodeGenerator> code_gen(
+        CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+    const int frame_size = 64;
+    int core_reg = 0;
+    int fp_reg = 0;
+    for (int i = 0; i < 2; i++) {  // Two registers of each kind.
+      for (; core_reg < 32; core_reg++) {
+        if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+          auto location = Location::RegisterLocation(core_reg);
+          code_gen->AddAllocatedRegister(location);
+          core_reg++;
+          break;
+        }
+      }
+      for (; fp_reg < 32; fp_reg++) {
+        if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+          auto location = Location::FpuRegisterLocation(fp_reg);
+          code_gen->AddAllocatedRegister(location);
+          fp_reg++;
+          break;
+        }
+      }
+    }
+    code_gen->ComputeSpillMask();
+    code_gen->SetFrameSize(frame_size);
+    code_gen->GenerateFrameEntry();
+    code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid());
+    // Get the outputs.
+    InternalCodeAllocator code_allocator;
+    code_gen->Finalize(&code_allocator);
+    const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
+    Assembler* opt_asm = code_gen->GetAssembler();
+    const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+
+ private:
+  class InternalCodeAllocator : public CodeAllocator {
+   public:
+    InternalCodeAllocator() {}
+
+    virtual uint8_t* Allocate(size_t size) {
+      memory_.resize(size);
+      return memory_.data();
+    }
+
+    const std::vector<uint8_t>& GetMemory() { return memory_; }
+
+   private:
+    std::vector<uint8_t> memory_;
+
+    DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+  };
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(OptimizingCFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
new file mode 100644
index 0000000000..2125f6eb01
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -0,0 +1,141 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: .cfi_restore_state
+// 0x00000012: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
+    0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: str w0, [sp, #-64]!
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp x19, x20, [sp, #40]
+// 0x00000008: .cfi_offset: r19 at cfa-24
+// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000008: str lr, [sp, #56]
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: stp d8, d9, [sp, #24]
+// 0x00000010: .cfi_offset_extended: r72 at cfa-40
+// 0x00000010: .cfi_offset_extended: r73 at cfa-32
+// 0x00000010: .cfi_remember_state
+// 0x00000010: ldp d8, d9, [sp, #24]
+// 0x00000014: .cfi_restore_extended: r72
+// 0x00000014: .cfi_restore_extended: r73
+// 0x00000014: ldp x19, x20, [sp, #40]
+// 0x00000018: .cfi_restore: r19
+// 0x00000018: .cfi_restore: r20
+// 0x00000018: ldr lr, [sp, #56]
+// 0x0000001c: .cfi_restore: r30
+// 0x0000001c: add sp, sp, #0x40 (64)
+// 0x00000020: .cfi_def_cfa_offset: 0
+// 0x00000020: ret
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+    0x5E, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
+    0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+    0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push esi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r6 at cfa-8
+// 0x00000001: push ebp
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: sub esp, 52
+// 0x00000005: .cfi_def_cfa_offset: 64
+// 0x00000005: mov [esp], eax
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add esp, 52
+// 0x0000000b: .cfi_def_cfa_offset: 12
+// 0x0000000b: pop ebp
+// 0x0000000c: .cfi_def_cfa_offset: 8
+// 0x0000000c: .cfi_restore: r5
+// 0x0000000c: pop esi
+// 0x0000000d: .cfi_def_cfa_offset: 4
+// 0x0000000d: .cfi_restore: r6
+// 0x0000000d: ret
+// 0x0000000e: .cfi_restore_state
+// 0x0000000e: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
+    0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
+    0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+    0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
+    0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+    0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
+    0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push rbp
+// 0x00000001: .cfi_def_cfa_offset: 16
+// 0x00000001: .cfi_offset: r6 at cfa-16
+// 0x00000001: push rbx
+// 0x00000002: .cfi_def_cfa_offset: 24
+// 0x00000002: .cfi_offset: r3 at cfa-24
+// 0x00000002: subq rsp, 40
+// 0x00000006: .cfi_def_cfa_offset: 64
+// 0x00000006: movsd [rsp + 32], xmm13
+// 0x0000000d: .cfi_offset: r30 at cfa-32
+// 0x0000000d: movsd [rsp + 24], xmm12
+// 0x00000014: .cfi_offset: r29 at cfa-40
+// 0x00000014: mov [rsp], edi
+// 0x00000017: .cfi_remember_state
+// 0x00000017: movsd xmm12, [rsp + 24]
+// 0x0000001e: .cfi_restore: r29
+// 0x0000001e: movsd xmm13, [rsp + 32]
+// 0x00000025: .cfi_restore: r30
+// 0x00000025: addq rsp, 40
+// 0x00000029: .cfi_def_cfa_offset: 24
+// 0x00000029: pop rbx
+// 0x0000002a: .cfi_def_cfa_offset: 16
+// 0x0000002a: .cfi_restore: r3
+// 0x0000002a: pop rbp
+// 0x0000002b: .cfi_def_cfa_offset: 8
+// 0x0000002b: .cfi_restore: r6
+// 0x0000002b: ret
+// 0x0000002c: .cfi_restore_state
+// 0x0000002c: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b2f9c65153..0e02212867 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,11 +26,13 @@
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "code_generator.h"
+#include "compiled_method.h"
 #include "compiler.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
 #include "elf_writer_quick.h"
 #include "graph_visualizer.h"
@@ -48,6 +50,7 @@
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
+#include "utils/assembler.h"
 #include "reference_type_propagation.h"
 
 namespace art {
@@ -94,10 +97,13 @@ class PassInfoPrinter : public ValueObject {
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(method_name, true, true),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
-        visualizer_(visualizer_output, graph, codegen, method_name_) {
+        visualizer_(visualizer_output, graph, codegen) {
     if (strstr(method_name, kStringFilter) == nullptr) {
       timing_logger_enabled_ = visualizer_enabled_ = false;
     }
+    if (visualizer_enabled_) {
+      visualizer_.PrintHeader(method_name_);
+    }
   }
 
   ~PassInfoPrinter() {
@@ -199,8 +205,13 @@ class OptimizingCompiler FINAL : public Compiler {
                 const std::vector<const art::DexFile*>& dex_files,
                 const std::string& android_root,
                 bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                        *GetCompilerDriver());
+    if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+      return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+                                           *GetCompilerDriver());
+    } else {
+      return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+                                           *GetCompilerDriver());
+    }
   }
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
@@ -360,6 +371,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
   return ArrayRef<const uint8_t>(vector);
 }
 
+// TODO: The function below uses too much stack space.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
 
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
@@ -385,12 +399,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
   CodeVectorAllocator allocator;
   codegen->CompileOptimized(&allocator);
 
+  DefaultSrcMap src_mapping_table;
+  if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    codegen->BuildSourceMap(&src_mapping_table);
+  }
+
   std::vector<uint8_t> stack_map;
   codegen->BuildStackMaps(&stack_map);
 
   compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
 
-  return CompiledMethod::SwapAllocCompiledMethodStackMap(
+  return CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
       ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -400,9 +419,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const uint8_t>(stack_map));
+      &src_mapping_table,
+      ArrayRef<const uint8_t>(),  // mapping_table.
+      ArrayRef<const uint8_t>(stack_map),
+      ArrayRef<const uint8_t>(),  // native_gc_map.
+      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+      ArrayRef<const LinkerPatch>());
 }
 
+#pragma GCC diagnostic pop
 
 CompiledMethod* OptimizingCompiler::CompileBaseline(
     CodeGenerator* codegen,
@@ -412,9 +437,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
   codegen->CompileBaseline(&allocator);
 
   std::vector<uint8_t> mapping_table;
+  codegen->BuildMappingTable(&mapping_table);
   DefaultSrcMap src_mapping_table;
-  bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
-  codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+  if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    codegen->BuildSourceMap(&src_mapping_table);
+  }
   std::vector<uint8_t> vmap_table;
   codegen->BuildVMapTable(&vmap_table);
   std::vector<uint8_t> gc_map;
@@ -435,7 +462,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>());
+      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+      ArrayRef<const LinkerPatch>());
 }
 
 CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -501,6 +529,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
     compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
+  codegen->GetAssembler()->cfi().SetEnabled(
+      compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols());
 
   PassInfoPrinter pass_info_printer(graph,
                                     method_name.c_str(),
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 7d0641ec13..4936685367 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <iostream>
 
 #include "parallel_move_resolver.h"
 #include "nodes.h"
@@ -63,39 +64,42 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
   }
 }
 
+Location LowOf(Location location) {
+  if (location.IsRegisterPair()) {
+    return Location::RegisterLocation(location.low());
+  } else if (location.IsFpuRegisterPair()) {
+    return Location::FpuRegisterLocation(location.low());
+  } else if (location.IsDoubleStackSlot()) {
+    return Location::StackSlot(location.GetStackIndex());
+  } else {
+    return Location::NoLocation();
+  }
+}
+
+Location HighOf(Location location) {
+  if (location.IsRegisterPair()) {
+    return Location::RegisterLocation(location.high());
+  } else if (location.IsFpuRegisterPair()) {
+    return Location::FpuRegisterLocation(location.high());
+  } else if (location.IsDoubleStackSlot()) {
+    return Location::StackSlot(location.GetHighStackIndex(4));
+  } else {
+    return Location::NoLocation();
+  }
+}
+
 // Update the source of `move`, knowing that `updated_location` has been swapped
 // with `new_source`. Note that `updated_location` can be a pair, therefore if
 // `move` is non-pair, we need to extract which register to use.
 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
   Location source = move->GetSource();
-  if (new_source.GetKind() == source.GetKind()) {
-    DCHECK(updated_location.Equals(source));
-    move->SetSource(new_source);
-  } else if (new_source.IsStackSlot()
-             || new_source.IsDoubleStackSlot()
-             || source.IsStackSlot()
-             || source.IsDoubleStackSlot()) {
-    // Stack slots never take part of a pair/non-pair swap.
-    DCHECK(updated_location.Equals(source));
+  if (LowOf(updated_location).Equals(source)) {
+    move->SetSource(LowOf(new_source));
+  } else if (HighOf(updated_location).Equals(source)) {
+    move->SetSource(HighOf(new_source));
+  } else {
+    DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
     move->SetSource(new_source);
-  } else if (source.IsRegister()) {
-    DCHECK(new_source.IsRegisterPair()) << new_source;
-    DCHECK(updated_location.IsRegisterPair()) << updated_location;
-    if (updated_location.low() == source.reg()) {
-      move->SetSource(Location::RegisterLocation(new_source.low()));
-    } else {
-      DCHECK_EQ(updated_location.high(), source.reg());
-      move->SetSource(Location::RegisterLocation(new_source.high()));
-    }
-  } else if (source.IsFpuRegister()) {
-    DCHECK(new_source.IsFpuRegisterPair()) << new_source;
-    DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
-    if (updated_location.low() == source.reg()) {
-      move->SetSource(Location::FpuRegisterLocation(new_source.low()));
-    } else {
-      DCHECK_EQ(updated_location.high(), source.reg());
-      move->SetSource(Location::FpuRegisterLocation(new_source.high()));
-    }
   }
 }
 
@@ -265,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
 }
 
 
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+                                                  int register_count) {
+  int scratch = -1;
+  for (int reg = 0; reg < register_count; ++reg) {
+    if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+      scratch = reg;
+      break;
+    }
+  }
+
+  return scratch;
+}
+
+
 ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
     ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
@@ -278,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
 }
 
 
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+    ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+    : resolver_(resolver),
+      reg_(kNoRegister),
+      spilled_(false) {
+  // We don't want to spill a register if none are free.
+  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
 ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
   if (spilled_) {
     resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37afd..173cffc71e 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject {
  protected:
   class ScratchRegisterScope : public ValueObject {
    public:
+    // Spill a scratch register if no regs are free.
     ScratchRegisterScope(ParallelMoveResolver* resolver,
                          int blocked,
                          int if_scratch,
                          int number_of_registers);
+    // Grab a scratch register only if available.
+    ScratchRegisterScope(ParallelMoveResolver* resolver,
+                         int blocked,
+                         int number_of_registers);
     ~ScratchRegisterScope();
 
     int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject {
   // Allocate a scratch register for performing a move. The method will try to use
   // a register that is the destination of a move, but that move has not been emitted yet.
   int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+  // As above, but return -1 if no free register.
+  int AllocateScratchRegister(int blocked, int register_count);
 
   // Emit a move.
   virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 817a44b184..5c502f7ef4 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -31,8 +31,13 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
       message_ << "C";
     } else if (location.IsPair()) {
       message_ << location.low() << "," << location.high();
-    } else {
+    } else if (location.IsRegister()) {
       message_ << location.reg();
+    } else if (location.IsStackSlot()) {
+      message_ << location.GetStackIndex() << "(sp)";
+    } else {
+      message_ << "2x" << location.GetStackIndex() << "(sp)";
+      DCHECK(location.IsDoubleStackSlot()) << location;
     }
   }
 
@@ -279,6 +284,26 @@ TEST(ParallelMoveTest, Pairs) {
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
   }
+
+  {
+    // Test involving registers used in single context and pair context.
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(10),
+        Location::RegisterLocation(5),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(4, 5),
+        Location::DoubleStackSlot(32),
+        nullptr);
+    moves->AddMove(
+        Location::DoubleStackSlot(32),
+        Location::RegisterPairLocation(10, 11),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+  }
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 2d9a2bf330..f5d8d82571 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
 
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   bool needs_materialization = false;
-  if (!condition->GetUses().HasOnlyOneUse()) {
+  if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
     needs_materialization = true;
   } else {
     HInstruction* user = condition->GetUses().GetFirst()->GetUser();
-    if (!user->IsIf()) {
+    if (!user->IsIf() && !user->IsDeoptimize()) {
       needs_materialization = true;
     } else {
       // TODO: if there is no intervening instructions with side-effect between this condition
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index cf38bd3f8c..4bca43499f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
 
     // Walk over all uses covered by this interval, and update the location
     // information.
-    while (use != nullptr && use->GetPosition() <= current->GetEnd()) {
-      LocationSummary* locations = use->GetUser()->GetLocations();
-      if (use->GetIsEnvironment()) {
-        locations->SetEnvironmentAt(use->GetInputIndex(), source);
-      } else {
-        Location expected_location = locations->InAt(use->GetInputIndex());
-        // The expected (actual) location may be invalid in case the input is unused. Currently
-        // this only happens for intrinsics.
-        if (expected_location.IsValid()) {
-          if (expected_location.IsUnallocated()) {
-            locations->SetInAt(use->GetInputIndex(), source);
-          } else if (!expected_location.IsConstant()) {
-            AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
-          }
+
+    LiveRange* range = current->GetFirstRange();
+    while (range != nullptr) {
+      while (use != nullptr && use->GetPosition() < range->GetStart()) {
+        DCHECK(use->GetIsEnvironment());
+        use = use->GetNext();
+      }
+      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+        LocationSummary* locations = use->GetUser()->GetLocations();
+        if (use->GetIsEnvironment()) {
+          locations->SetEnvironmentAt(use->GetInputIndex(), source);
         } else {
-          DCHECK(use->GetUser()->IsInvoke());
-          DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+          Location expected_location = locations->InAt(use->GetInputIndex());
+          // The expected (actual) location may be invalid in case the input is unused. Currently
+          // this only happens for intrinsics.
+          if (expected_location.IsValid()) {
+            if (expected_location.IsUnallocated()) {
+              locations->SetInAt(use->GetInputIndex(), source);
+            } else if (!expected_location.IsConstant()) {
+              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+            }
+          } else {
+            DCHECK(use->GetUser()->IsInvoke());
+            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+          }
         }
+        use = use->GetNext();
       }
-      use = use->GetNext();
+      range = range->GetNext();
     }
 
     // If the next interval starts just after this one, and has a register,
@@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
     }
     current = next_sibling;
   } while (current != nullptr);
-  DCHECK(use == nullptr);
+
+  if (kIsDebugBuild) {
+    // Following uses can only be environment uses. The location for
+    // these environments will be none.
+    while (use != nullptr) {
+      DCHECK(use->GetIsEnvironment());
+      use = use->GetNext();
+    }
+  }
 }
 
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 7c3a0357d6..3951439881 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) {
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
   graph->TryBuildingSsa();
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   GrowableArray<LiveInterval*> intervals(&allocator, 0);
 
   // Test with two intervals of the same range.
@@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
@@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) {
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
   SsaDeadPhiElimination(graph).Run();
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) {
   ArenaAllocator allocator(&pool);
   HGraph* graph = BuildSSAGraph(data, &allocator);
   SsaDeadPhiElimination(graph).Run();
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
 
   {
     HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
 
   {
     HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
 
   {
     HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
 
   {
     HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
 
   {
     HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
 
   {
     HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
 
   {
     HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
 
   {
     HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
 
   {
     HGraph* graph = BuildDiv(&allocator, &div);
-    x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
@@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) {
   locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
 
-  x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(*graph, &codegen);
 
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index fcc4e69b37..e154ea4ee6 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
   HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
       GetGraph()->GetArena(), current_locals_->Size());
   environment->CopyFrom(current_locals_);
-  instruction->SetEnvironment(environment);
+  instruction->SetRawEnvironment(environment);
 }
 
 void SsaBuilder::VisitTemporary(HTemporary* temp) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0f3973e5fb..95da6ef551 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
         current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
       }
 
-      // All inputs of an instruction must be live.
-      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-        HInstruction* input = current->InputAt(i);
-        // Some instructions 'inline' their inputs, that is they do not need
-        // to be materialized.
-        if (input->HasSsaIndex()) {
-          live_in->SetBit(input->GetSsaIndex());
-          input->GetLiveInterval()->AddUse(current, i, false);
-        }
-      }
-
+      // Process the environment first, because we know their uses come after
+      // or at the same liveness position of inputs.
       if (current->HasEnvironment()) {
         // Handle environment uses. See statements (b) and (c) of the
         // SsaLivenessAnalysis.
         HEnvironment* environment = current->GetEnvironment();
         for (size_t i = 0, e = environment->Size(); i < e; ++i) {
           HInstruction* instruction = environment->GetInstructionAt(i);
-          if (ShouldBeLiveForEnvironment(instruction)) {
+          bool should_be_live = ShouldBeLiveForEnvironment(instruction);
+          if (should_be_live) {
             DCHECK(instruction->HasSsaIndex());
             live_in->SetBit(instruction->GetSsaIndex());
-            instruction->GetLiveInterval()->AddUse(current, i, true);
           }
+          if (instruction != nullptr) {
+            instruction->GetLiveInterval()->AddUse(
+                current, i, /* is_environment */ true, should_be_live);
+          }
+        }
+      }
+
+      // All inputs of an instruction must be live.
+      for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+        HInstruction* input = current->InputAt(i);
+        // Some instructions 'inline' their inputs, that is they do not need
+        // to be materialized.
+        if (input->HasSsaIndex()) {
+          live_in->SetBit(input->GetSsaIndex());
+          input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
         }
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index bc78dc2e76..d2da84c0c0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
     AddRange(position, position + 1);
   }
 
-  void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
+  void AddUse(HInstruction* instruction,
+              size_t input_index,
+              bool is_environment,
+              bool keep_alive = false) {
     // Set the use within the instruction.
     size_t position = instruction->GetLifetimePosition() + 1;
     LocationSummary* locations = instruction->GetLocations();
@@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
         && (first_use_->GetPosition() < position)) {
       // The user uses the instruction multiple times, and one use dies before the other.
       // We update the use list so that the latter is first.
+      DCHECK(!is_environment);
       UsePosition* cursor = first_use_;
       while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) {
         cursor = cursor->GetNext();
@@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
       return;
     }
 
+    first_use_ = new (allocator_) UsePosition(
+        instruction, input_index, is_environment, position, first_use_);
+
+    if (is_environment && !keep_alive) {
+      // If this environment use does not keep the instruction live, it does not
+      // affect the live range of that instruction.
+      return;
+    }
+
     size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
     if (first_range_ == nullptr) {
       // First time we see a use of that interval.
@@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
       // and the check line 205 would succeed.
       first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
     }
-    first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, is_environment, position, first_use_);
   }
 
   void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
@@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
-      size_t use_position = use->GetPosition();
-      if (use_position > position) {
-        return use_position;
+      if (!use->GetIsEnvironment()) {
+        size_t use_position = use->GetPosition();
+        if (use_position > position) {
+          return use_position;
+        }
       }
       use = use->GetNext();
     }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 5818a37a46..a73c8d77f3 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -27,6 +27,32 @@
 
 namespace art {
 
+// Helper to build art::StackMapStream::LocationCatalogEntriesIndices.
+class LocationCatalogEntriesIndicesEmptyFn {
+ public:
+  void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const {
+    item.first = DexRegisterLocation::None();
+  }
+  bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const {
+    return item.first == DexRegisterLocation::None();
+  }
+};
+
+// Hash function for art::StackMapStream::LocationCatalogEntriesIndices.
+// This hash function does not create collisions.
+class DexRegisterLocationHashFn {
+ public:
+  size_t operator()(DexRegisterLocation key) const {
+    // Concatenate `key`s fields to create a 64-bit value to be hashed.
+    int64_t kind_and_value =
+        (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_);
+    return inner_hash_fn_(kind_and_value);
+  }
+ private:
+  std::hash<int64_t> inner_hash_fn_;
+};
+
+
 /**
  * Collects and builds stack maps for a method. All the stack maps
  * for a method are placed in a CodeInfo object.
@@ -36,11 +62,13 @@ class StackMapStream : public ValueObject {
   explicit StackMapStream(ArenaAllocator* allocator)
       : allocator_(allocator),
         stack_maps_(allocator, 10),
+        location_catalog_entries_(allocator, 4),
         dex_register_locations_(allocator, 10 * 4),
         inline_infos_(allocator, 2),
         stack_mask_max_(-1),
         dex_pc_max_(0),
         native_pc_offset_max_(0),
+        register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
         dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
 
@@ -101,6 +129,7 @@ class StackMapStream : public ValueObject {
 
     dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
     native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
+    register_mask_max_ = std::max(register_mask_max_, register_mask);
   }
 
   void AddInlineInfoEntry(uint32_t method_index) {
@@ -111,6 +140,7 @@ class StackMapStream : public ValueObject {
 
   size_t ComputeNeededSize() {
     size_t size = CodeInfo::kFixedSize
+        + ComputeDexRegisterLocationCatalogSize()
         + ComputeStackMapsSize()
         + ComputeDexRegisterMapsSize()
         + ComputeInlineInfoSize();
@@ -128,24 +158,43 @@ class StackMapStream : public ValueObject {
         ComputeInlineInfoSize(),
         ComputeDexRegisterMapsSize(),
         dex_pc_max_,
-        native_pc_offset_max_);
+        native_pc_offset_max_,
+        register_mask_max_);
+  }
+
+  // Compute the size of the Dex register location catalog of `entry`.
+  size_t ComputeDexRegisterLocationCatalogSize() const {
+    size_t size = DexRegisterLocationCatalog::kFixedSize;
+    for (size_t location_catalog_entry_index = 0;
+         location_catalog_entry_index < location_catalog_entries_.Size();
+         ++location_catalog_entry_index) {
+      DexRegisterLocation dex_register_location =
+          location_catalog_entries_.Get(location_catalog_entry_index);
+      size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+    }
+    return size;
   }
 
-  // Compute the size of the Dex register map of `entry`.
   size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+    // Size of the map in bytes.
     size_t size = DexRegisterMap::kFixedSize;
-    // Add the bit mask for the dex register liveness.
-    size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
-    for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+    // Add the live bit mask for the Dex register liveness.
+    size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
+    // Compute the size of the set of live Dex register entries.
+    size_t number_of_live_dex_registers = 0;
+    for (size_t dex_register_number = 0;
          dex_register_number < entry.num_dex_registers;
          ++dex_register_number) {
       if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
-        DexRegisterLocation dex_register_location = dex_register_locations_.Get(
-            entry.dex_register_locations_start_index + index_in_dex_register_locations);
-        size += DexRegisterMap::EntrySize(dex_register_location);
-        index_in_dex_register_locations++;
+        ++number_of_live_dex_registers;
       }
     }
+    size_t map_entries_size_in_bits =
+        DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+        * number_of_live_dex_registers;
+    size_t map_entries_size_in_bytes =
+        RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
+    size += map_entries_size_in_bytes;
     return size;
   }
 
@@ -168,8 +217,16 @@ class StackMapStream : public ValueObject {
       + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
   }
 
+  size_t ComputeDexRegisterLocationCatalogStart() const {
+    return CodeInfo::kFixedSize;
+  }
+
+  size_t ComputeStackMapsStart() const {
+    return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
+  }
+
   size_t ComputeDexRegisterMapsStart() {
-    return CodeInfo::kFixedSize + ComputeStackMapsSize();
+    return ComputeStackMapsStart() + ComputeStackMapsSize();
   }
 
   size_t ComputeInlineInfoStart() {
@@ -194,11 +251,32 @@ class StackMapStream : public ValueObject {
       ComputeInlineInfoStart(),
       inline_info_size);
 
-    code_info.SetEncoding(
-        inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_);
+    code_info.SetEncoding(inline_info_size,
+                          dex_register_map_size,
+                          dex_pc_max_,
+                          native_pc_offset_max_,
+                          register_mask_max_);
     code_info.SetNumberOfStackMaps(stack_maps_.Size());
     code_info.SetStackMaskSize(stack_mask_size);
-    DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize());
+    DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
+
+    // Set the Dex register location catalog.
+    code_info.SetNumberOfDexRegisterLocationCatalogEntries(
+        location_catalog_entries_.Size());
+    MemoryRegion dex_register_location_catalog_region = region.Subregion(
+        ComputeDexRegisterLocationCatalogStart(),
+        ComputeDexRegisterLocationCatalogSize());
+    DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
+    // Offset in `dex_register_location_catalog` where to store the next
+    // register location.
+    size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
+    for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
+      DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+      dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
+      location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+    }
+    // Ensure we reached the end of the Dex registers location_catalog.
+    DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
 
     uintptr_t next_dex_register_map_offset = 0;
     uintptr_t next_inline_info_offset = 0;
@@ -234,25 +312,25 @@ class StackMapStream : public ValueObject {
           stack_map.SetDexRegisterMapOffset(
             code_info, register_region.start() - dex_register_locations_region.start());
 
-          // Offset in `dex_register_map` where to store the next register entry.
-          size_t offset = DexRegisterMap::kFixedSize;
-          dex_register_map.SetLiveBitMask(offset,
-                                          entry.num_dex_registers,
-                                          *entry.live_dex_registers_mask);
-          offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+          // Set the live bit mask.
+          dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
+
+          // Set the dex register location mapping data.
           for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
                dex_register_number < entry.num_dex_registers;
                ++dex_register_number) {
             if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
-              DexRegisterLocation dex_register_location = dex_register_locations_.Get(
-                  entry.dex_register_locations_start_index + index_in_dex_register_locations);
-              dex_register_map.SetRegisterInfo(offset, dex_register_location);
-              offset += DexRegisterMap::EntrySize(dex_register_location);
+              size_t location_catalog_entry_index =
+                  dex_register_locations_.Get(entry.dex_register_locations_start_index
+                                              + index_in_dex_register_locations);
+              dex_register_map.SetLocationCatalogEntryIndex(
+                  index_in_dex_register_locations,
+                  location_catalog_entry_index,
+                  entry.num_dex_registers,
+                  location_catalog_entries_.Size());
               ++index_in_dex_register_locations;
             }
           }
-          // Ensure we reached the end of the Dex registers region.
-          DCHECK_EQ(offset, register_region.size());
         }
       }
 
@@ -282,12 +360,31 @@ class StackMapStream : public ValueObject {
   }
 
   void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) {
+    StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+    DCHECK_LT(dex_register, entry.num_dex_registers);
+
     if (kind != DexRegisterLocation::Kind::kNone) {
       // Ensure we only use non-compressed location kind at this stage.
       DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
           << DexRegisterLocation::PrettyDescriptor(kind);
-      dex_register_locations_.Add(DexRegisterLocation(kind, value));
-      StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+      DexRegisterLocation location(kind, value);
+
+      // Look for Dex register `location` in the location catalog (using the
+      // companion hash map of locations to indices).  Use its index if it
+      // is already in the location catalog.  If not, insert it (in the
+      // location catalog and the hash map) and use the newly created index.
+      auto it = location_catalog_entries_indices_.Find(location);
+      if (it != location_catalog_entries_indices_.end()) {
+        // Retrieve the index from the hash map.
+        dex_register_locations_.Add(it->second);
+      } else {
+        // Create a new entry in the location catalog and the hash map.
+        size_t index = location_catalog_entries_.Size();
+        location_catalog_entries_.Add(location);
+        dex_register_locations_.Add(index);
+        location_catalog_entries_indices_.Insert(std::make_pair(location, index));
+      }
+
       entry.live_dex_registers_mask->SetBit(dex_register);
       entry.dex_register_map_hash += (1 << dex_register);
       entry.dex_register_map_hash += static_cast<uint32_t>(value);
@@ -354,9 +451,9 @@ class StackMapStream : public ValueObject {
         return false;
       }
       if (a.live_dex_registers_mask->IsBitSet(i)) {
-        DexRegisterLocation a_loc = dex_register_locations_.Get(
+        size_t a_loc = dex_register_locations_.Get(
             a.dex_register_locations_start_index + index_in_dex_register_locations);
-        DexRegisterLocation b_loc = dex_register_locations_.Get(
+        size_t b_loc = dex_register_locations_.Get(
             b.dex_register_locations_start_index + index_in_dex_register_locations);
         if (a_loc != b_loc) {
           return false;
@@ -369,21 +466,29 @@ class StackMapStream : public ValueObject {
 
   ArenaAllocator* allocator_;
   GrowableArray<StackMapEntry> stack_maps_;
-  GrowableArray<DexRegisterLocation> dex_register_locations_;
+
+  // A catalog of unique [location_kind, register_value] pairs (per method).
+  GrowableArray<DexRegisterLocation> location_catalog_entries_;
+  // Map from Dex register location catalog entries to their indices in the
+  // location catalog.
+  typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
+                  DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
+  LocationCatalogEntriesIndices location_catalog_entries_indices_;
+
+  // A set of concatenated maps of Dex register locations indices to
+  // `location_catalog_entries_`.
+  GrowableArray<size_t> dex_register_locations_;
   GrowableArray<InlineInfoEntry> inline_infos_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
   uint32_t native_pc_offset_max_;
+  uint32_t register_mask_max_;
   size_t number_of_stack_maps_with_inline_info_;
 
   ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
 
   static constexpr uint32_t kNoSameDexMapFound = -1;
 
-  ART_FRIEND_TEST(StackMapTest, Test1);
-  ART_FRIEND_TEST(StackMapTest, Test2);
-  ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters);
-
   DISALLOW_COPY_AND_ASSIGN(StackMapStream);
 };
 
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index e5a9790254..8d160bc81e 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
   return true;
 }
 
+using Kind = DexRegisterLocation::Kind;
+
 TEST(StackMapTest, Test1) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) {
   ArenaBitVector sp_mask(&arena, 0, false);
   size_t number_of_dex_registers = 2;
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Short location.
 
   size_t size = stream.ComputeNeededSize();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) {
   ASSERT_EQ(0u, code_info.GetStackMaskSize());
   ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
 
+  uint32_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  ASSERT_EQ(2u, number_of_location_catalog_entries);
+  DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+  // The Dex register location catalog contains:
+  // - one 1-byte short Dex register location, and
+  // - one 5-byte large Dex register location.
+  size_t expected_location_catalog_size = 1u + 5u;
+  ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
   StackMap stack_map = code_info.GetStackMapAt(0);
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
   ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
@@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) {
   ASSERT_TRUE(SameBits(stack_mask, sp_mask));
 
   ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
-  DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-  ASSERT_EQ(7u, dex_registers.Size());
-  DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
-  DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
-  ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
-  ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
-  ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
-  ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+  DexRegisterMap dex_register_map =
+      code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+  ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+  ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+  ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+  // The Dex register map contains:
+  // - one 1-byte live bit mask, and
+  // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+  size_t expected_dex_register_map_size = 1u + 1u;
+  ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+  ASSERT_EQ(Kind::kInStack,
+            dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kConstant,
+            dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kInStack,
+            dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kConstantLargeValue,
+            dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+  ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+  ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+  size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+      0, number_of_dex_registers, number_of_location_catalog_entries);
+  size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+      1, number_of_dex_registers, number_of_location_catalog_entries);
+  ASSERT_EQ(0u, index0);
+  ASSERT_EQ(1u, index1);
+  DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+  DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+  ASSERT_EQ(Kind::kInStack, location0.GetKind());
+  ASSERT_EQ(Kind::kConstant, location1.GetKind());
+  ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+  ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
   ASSERT_EQ(0, location0.GetValue());
   ASSERT_EQ(-2, location1.GetValue());
 
@@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) {
   sp_mask1.SetBit(4);
   size_t number_of_dex_registers = 2;
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
   stream.AddInlineInfoEntry(42);
   stream.AddInlineInfoEntry(82);
 
@@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) {
   sp_mask2.SetBit(3);
   sp_mask1.SetBit(8);
   stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3);
+  stream.AddDexRegisterEntry(0, Kind::kInRegister, 18);     // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3);   // Short location.
 
   size_t size = stream.ComputeNeededSize();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) {
   ASSERT_EQ(1u, code_info.GetStackMaskSize());
   ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
+  uint32_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  ASSERT_EQ(4u, number_of_location_catalog_entries);
+  DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+  // The Dex register location catalog contains:
+  // - three 1-byte short Dex register locations, and
+  // - one 5-byte large Dex register location.
+  size_t expected_location_catalog_size = 3u * 1u + 5u;
+  ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
   // First stack map.
   {
     StackMap stack_map = code_info.GetStackMapAt(0);
@@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) {
     ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
-    DexRegisterMap dex_registers =
+    DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-    ASSERT_EQ(7u, dex_registers.Size());
-    DexRegisterLocation location0 =
-        dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
-    DexRegisterLocation location1 =
-        dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
-    ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+    ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+    ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+    ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+    // The Dex register map contains:
+    // - one 1-byte live bit mask, and
+    // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+    size_t expected_dex_register_map_size = 1u + 1u;
+    ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+    ASSERT_EQ(Kind::kInStack,
+              dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kConstant,
+              dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kInStack,
+              dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kConstantLargeValue,
+              dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+    ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+    size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+        0, number_of_dex_registers, number_of_location_catalog_entries);
+    size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+        1, number_of_dex_registers, number_of_location_catalog_entries);
+    ASSERT_EQ(0u, index0);
+    ASSERT_EQ(1u, index1);
+    DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+    DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+    ASSERT_EQ(Kind::kInStack, location0.GetKind());
+    ASSERT_EQ(Kind::kConstant, location1.GetKind());
+    ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+    ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
     ASSERT_EQ(0, location0.GetValue());
     ASSERT_EQ(-2, location1.GetValue());
 
@@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) {
     ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
-    DexRegisterMap dex_registers =
+    DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-    ASSERT_EQ(3u, dex_registers.Size());
-    DexRegisterLocation location0 =
-        dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
-    DexRegisterLocation location1 =
-        dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
-    ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind());
-    ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind());
+    ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+    ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+    ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+    // The Dex register map contains:
+    // - one 1-byte live bit mask, and
+    // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+    size_t expected_dex_register_map_size = 1u + 1u;
+    ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+    ASSERT_EQ(Kind::kInRegister,
+              dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kInFpuRegister,
+              dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kInRegister,
+              dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(Kind::kInFpuRegister,
+              dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+    ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info));
+    ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info));
+
+    size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+        0, number_of_dex_registers, number_of_location_catalog_entries);
+    size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+        1, number_of_dex_registers, number_of_location_catalog_entries);
+    ASSERT_EQ(2u, index0);
+    ASSERT_EQ(3u, index1);
+    DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+    DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+    ASSERT_EQ(Kind::kInRegister, location0.GetKind());
+    ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind());
+    ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind());
+    ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind());
     ASSERT_EQ(18, location0.GetValue());
     ASSERT_EQ(3, location1.GetValue());
 
@@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 2;
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kNone, 0);            // No location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
 
   size_t size = stream.ComputeNeededSize();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
   stream.FillIn(region);
 
   CodeInfo code_info(region);
+  ASSERT_EQ(0u, code_info.GetStackMaskSize());
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+  uint32_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  ASSERT_EQ(1u, number_of_location_catalog_entries);
+  DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+  // The Dex register location catalog contains:
+  // - one 5-byte large Dex register location.
+  size_t expected_location_catalog_size = 5u;
+  ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
   StackMap stack_map = code_info.GetStackMapAt(0);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+  ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
   ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
-  DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
-  ASSERT_EQ(DexRegisterLocation::Kind::kNone,
-            dex_registers.GetLocationKind(0, number_of_dex_registers));
-  ASSERT_EQ(DexRegisterLocation::Kind::kConstant,
-            dex_registers.GetLocationKind(1, number_of_dex_registers));
-  ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers));
+  DexRegisterMap dex_register_map =
+      code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+  ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
+  ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+  ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+  // The Dex register map contains:
+  // - one 1-byte live bit mask.
+  // No space is allocated for the sole location catalog entry index, as it is useless.
+  size_t expected_dex_register_map_size = 1u + 0u;
+  ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+  ASSERT_EQ(Kind::kNone,
+            dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kConstant,
+            dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kNone,
+            dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+  ASSERT_EQ(Kind::kConstantLargeValue,
+            dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+  ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+  size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+      0, number_of_dex_registers, number_of_location_catalog_entries);
+  size_t index1 =  dex_register_map.GetLocationCatalogEntryIndex(
+      1, number_of_dex_registers, number_of_location_catalog_entries);
+  ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
+  ASSERT_EQ(0u, index1);
+  DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+  DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+  ASSERT_EQ(Kind::kNone, location0.GetKind());
+  ASSERT_EQ(Kind::kConstant, location1.GetKind());
+  ASSERT_EQ(Kind::kNone, location0.GetInternalKind());
+  ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
+  ASSERT_EQ(0, location0.GetValue());
+  ASSERT_EQ(-2, location1.GetValue());
+
   ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
 }
 
@@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
   StackMapStream stream(&arena);
 
   ArenaBitVector sp_mask(&arena, 0, false);
-  uint32_t number_of_dex_registers = 0xEA;
+  uint32_t number_of_dex_registers = 1024;
+  // Create the first stack map (and its Dex register map).
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) {
-    stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+  uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
+  for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
+    // Use two different Dex register locations to populate this map,
+    // as using a single value (in the whole CodeInfo object) would
+    // make this Dex register mapping data empty (see
+    // art::DexRegisterMap::SingleEntrySizeInBits).
+    stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2);  // Short location.
   }
+  // Create the second stack map (and its Dex register map).
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
-    stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+    stream.AddDexRegisterEntry(i, Kind::kConstant, 0);  // Short location.
   }
 
   size_t size = stream.ComputeNeededSize();
@@ -225,10 +374,35 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
   stream.FillIn(region);
 
   CodeInfo code_info(region);
-  StackMap stack_map = code_info.GetStackMapAt(1);
-  ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
-  ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
-  ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding);
+  // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
+  // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
+  // has a size of 1 bit.
+  uint32_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  ASSERT_EQ(2u, number_of_location_catalog_entries);
+  ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
+
+  // The first Dex register map contains:
+  // - a live register bit mask for 1024 registers (that is, 128 bytes of
+  //   data); and
+  // - Dex register mapping information for 1016 1-bit Dex (live) register
+  //   locations (that is, 127 bytes of data).
+  // Hence it has a size of 255 bytes, and therefore...
+  ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers));
+  StackMap stack_map0 = code_info.GetStackMapAt(0);
+  DexRegisterMap dex_register_map0 =
+      code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers);
+  ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
+                                                               number_of_location_catalog_entries));
+  ASSERT_EQ(255u, dex_register_map0.Size());
+
+  StackMap stack_map1 = code_info.GetStackMapAt(1);
+  ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info));
+  // ...the offset of the second Dex register map (relative to the
+  // beginning of the Dex register maps region) is 255 (i.e.,
+  // kNoDexRegisterMapSmallEncoding).
+  ASSERT_NE(stack_map1.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
+  ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(code_info), 0xFFu);
 }
 
 TEST(StackMapTest, TestShareDexRegisterMap) {
@@ -240,16 +414,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
   uint32_t number_of_dex_registers = 2;
   // First stack map.
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
   // Second stack map, which should share the same dex register map.
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
   // Third stack map (doesn't share the dex register map).
   stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2);
-  stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+  stream.AddDexRegisterEntry(0, Kind::kInRegister, 2);  // Short location.
+  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
 
   size_t size = stream.ComputeNeededSize();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -260,20 +434,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
   // Verify first stack map.
   StackMap sm0 = ci.GetStackMapAt(0);
   DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers);
-  ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers));
-  ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers));
+  ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci));
+  ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci));
 
   // Verify second stack map.
   StackMap sm1 = ci.GetStackMapAt(1);
   DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers);
-  ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers));
-  ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers));
+  ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci));
+  ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci));
 
   // Verify third stack map.
   StackMap sm2 = ci.GetStackMapAt(2);
   DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers);
-  ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers));
-  ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers));
+  ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci));
+  ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci));
 
   // Verify dex register map offsets.
   ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci));
@@ -281,4 +455,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
   ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci));
 }
 
+TEST(StackMapTest, TestNoDexRegisterMap) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena);
+
+  ArenaBitVector sp_mask(&arena, 0, false);
+  uint32_t number_of_dex_registers = 0;
+  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+
+  size_t size = stream.ComputeNeededSize();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo code_info(region);
+  ASSERT_EQ(0u, code_info.GetStackMaskSize());
+  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+  uint32_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  ASSERT_EQ(0u, number_of_location_catalog_entries);
+  DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+  ASSERT_EQ(0u, location_catalog.Size());
+
+  StackMap stack_map = code_info.GetStackMapAt(0);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+  ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+  ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+  ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info));
+  ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index a02191bc13..c41066027d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -89,7 +89,6 @@ uint32_t ShifterOperand::encodingArm() const {
       } else {
         return immed_;
       }
-      break;
     case kRegister:
       if (is_shift_) {
         uint32_t shift_type;
@@ -121,7 +120,6 @@ uint32_t ShifterOperand::encodingArm() const {
         // Simple register
         return static_cast<uint32_t>(rm_);
       }
-      break;
     default:
       // Can't get here.
       LOG(FATAL) << "Invalid shifter operand for ARM";
@@ -156,13 +154,11 @@ uint32_t ShifterOperand::encodingThumb() const {
         // Simple register
         return static_cast<uint32_t>(rm_);
       }
-      break;
     default:
       // Can't get here.
       LOG(FATAL) << "Invalid shifter operand for thumb";
-      return 0;
+      UNREACHABLE();
   }
-  return 0;
 }
 
 uint32_t Address::encodingArm() const {
@@ -374,40 +370,46 @@ void ArmAssembler::Pad(uint32_t bytes) {
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+  return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 4;
 
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& callee_save_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
   CHECK_ALIGNED(frame_size, kStackAlignment);
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
 
   // Push callee saves and link register.
-  RegList push_list = 1 << LR;
-  size_t pushed_values = 1;
-  int32_t min_s = kNumberOfSRegisters;
-  int32_t max_s = -1;
-  for (size_t i = 0; i < callee_save_regs.size(); i++) {
-    if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
-      Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
-      push_list |= 1 << reg;
-      pushed_values++;
+  RegList core_spill_mask = 1 << LR;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
     } else {
-      CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
-      min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
-      max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
     }
   }
-  PushList(push_list);
-  if (max_s != -1) {
-    pushed_values += 1 + max_s - min_s;
-    vpushs(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+  PushList(core_spill_mask);
+  cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+  cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+  if (fp_spill_mask != 0) {
+    vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+    cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
   }
 
   // Increase frame to required size.
+  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
   CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
-  size_t adjust = frame_size - (pushed_values * kFramePointerSize);
-  IncreaseFrameSize(adjust);
+  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
 
   // Write out Method*.
   StoreToOffset(kStoreWord, R0, SP, 0);
@@ -436,46 +438,46 @@ void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void ArmAssembler::RemoveFrame(size_t frame_size,
                               const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
+
   // Compute callee saves to pop and PC.
-  RegList pop_list = 1 << PC;
-  size_t pop_values = 1;
-  int32_t min_s = kNumberOfSRegisters;
-  int32_t max_s = -1;
-  for (size_t i = 0; i < callee_save_regs.size(); i++) {
-    if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
-      Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
-      pop_list |= 1 << reg;
-      pop_values++;
+  RegList core_spill_mask = 1 << PC;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
     } else {
-      CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
-      min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
-      max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
     }
   }
 
-  if (max_s != -1) {
-    pop_values += 1 + max_s - min_s;
-  }
-
   // Decrease frame to start of callee saves.
+  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
   CHECK_GT(frame_size, pop_values * kFramePointerSize);
-  size_t adjust = frame_size - (pop_values * kFramePointerSize);
-  DecreaseFrameSize(adjust);
+  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
 
-  if (max_s != -1) {
-    vpops(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+  if (fp_spill_mask != 0) {
+    vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+    cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
   }
 
   // Pop callee saves and PC.
-  PopList(pop_list);
+  PopList(core_spill_mask);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void ArmAssembler::IncreaseFrameSize(size_t adjust) {
   AddConstant(SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void ArmAssembler::DecreaseFrameSize(size_t adjust) {
   AddConstant(SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a894319c99..3b42f63509 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
 
 
 void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  ldrd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
 
 
 void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  strd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
@@ -683,7 +693,7 @@ void Thumb2Assembler::Emit16(int16_t value) {
 
 bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
                                             Opcode opcode,
-                                            bool set_cc ATTRIBUTE_UNUSED,
+                                            bool set_cc,
                                             Register rn,
                                             Register rd,
                                             const ShifterOperand& so) {
@@ -749,7 +759,6 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
       break;
     case TEQ:
       return true;
-      break;
     case ADD:
     case SUB:
       break;
@@ -2614,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type,
   Register tmp_reg = kNoRegister;
   if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    if (reg != IP) {
+    if (reg != IP &&
+        (type != kStoreWordPair || reg + 1 != IP)) {
       tmp_reg = IP;
     } else {
-      // Be careful not to use IP twice (for `reg` and to build the
-      // Address object used by the store instruction(s) below).
-      // Instead, save R5 on the stack (or R6 if R5 is not available),
-      // use it as secondary temporary register, and restore it after
-      // the store instruction has been emitted.
+      // Be careful not to use IP twice (for `reg` (or `reg` + 1 in
+      // the case of a word-pair store)) and to build the Address
+      // object used by the store instruction(s) below).  Instead,
+      // save R5 on the stack (or R6 if R5 is not available), use it
+      // as secondary temporary register, and restore it after the
+      // store instruction has been emitted.
       tmp_reg = base != R5 ? R5 : R6;
       Push(tmp_reg);
       if (base == SP) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 81dd13894f..e33c240dbf 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler {
   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd` + 1.
   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd2`.
+  // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
+  // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
+  void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
+  void strd(Register rd, Register rd2, const Address& ad, Condition cond);
+
+
   void ldm(BlockAddressMode am, Register base,
            RegList regs, Condition cond = AL) OVERRIDE;
   void stm(BlockAddressMode am, Register base,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 813996b0db..5f5561a499 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -247,4 +247,103 @@ TEST_F(AssemblerThumb2Test, add) {
   DriverStr(expected, "add");
 }
 
+TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4092;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "str r0, [sp, #4092]\n"
+      "str ip, [sp, #4092]\n"
+      "str ip, [r5, #4092]\n";
+  DriverStr(expected, "StoreWordToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4096;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
+      "add ip, ip, sp\n"
+      "str r0, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"  // Push(r5)
+      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "str ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"    // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"  // Push(r6)
+      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
+      "add r6, r6, r5\n"
+      "str ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";   // Pop(r6)
+  DriverStr(expected, "StoreWordToNonThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1020;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // We cannot use IP (i.e. R12) as first source register, as it would
+  // force us to use SP (i.e. R13) as second source register, which
+  // would have an "unpredictable" effect according to the ARMv7
+  // specification (the T1 encoding describes the result as
+  // UNPREDICTABLE when of the source registers is R13).
+  //
+  // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the
+  // following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "strd r0, r1, [sp, #1020]\n"
+      "strd r11, ip, [sp, #1020]\n"
+      "strd r11, ip, [r5, #1020]\n";
+  DriverStr(expected, "StoreWordPairToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1024;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset
+  // regarding the use of (R11, IP) (e.g. (R11, R12)) as source
+  // registers in the following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
+      "add ip, ip, sp\n"
+      "strd r0, r1, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"      // Push(r5)
+      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "strd r11, ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"        // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"      // Push(r6)
+      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
+      "add r6, r6, r5\n"
+      "strd r11, ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";       // Pop(r6)
+  DriverStr(expected, "StoreWordPairToNonThumbOffset");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index a496c87150..5fde9e8856 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "constants_arm.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 58c73674da..fbd04114e4 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -63,12 +63,14 @@ void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scr
 void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
 }
 
 // See Arm64 PCS Section 5.2.2.1.
 void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, adjust);
+  cfi().AdjustCFAOffset(-adjust);
 }
 
 void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
@@ -638,6 +640,14 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
   ___ Brk();
 }
 
+static dwarf::Reg DWARFReg(XRegister reg) {
+  return dwarf::Reg::Arm64Core(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(DRegister reg) {
+  return dwarf::Reg::Arm64Fp(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 8;
 constexpr unsigned int kJniRefSpillRegsSize = 11 + 8;
 
@@ -660,45 +670,20 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   // TUNING: Use stp.
   // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
   size_t reg_offset = frame_size;
-  reg_offset -= 8;
-  StoreToOffset(LR, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X29, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X28, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X27, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X26, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X25, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X24, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X23, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X22, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X21, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X20, SP, reg_offset);
-
-  reg_offset -= 8;
-  StoreDToOffset(D15, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D14, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D13, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D12, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D11, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D10, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D9, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D8, SP, reg_offset);
+  static constexpr XRegister x_spills[] = {
+      LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+  for (size_t i = 0; i < arraysize(x_spills); i++) {
+    XRegister reg = x_spills[i];
+    reg_offset -= 8;
+    StoreToOffset(reg, SP, reg_offset);
+    cfi_.RelOffset(DWARFReg(reg), reg_offset);
+  }
+  for (int d = 15; d >= 8; d--) {
+    DRegister reg = static_cast<DRegister>(d);
+    reg_offset -= 8;
+    StoreDToOffset(reg, SP, reg_offset);
+    cfi_.RelOffset(DWARFReg(reg), reg_offset);
+  }
 
   // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack.
   // This way we make sure that TR is not trashed by native code.
@@ -734,6 +719,7 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 
 void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // For now we only check that the size of the frame is greater than the spill size.
   CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
@@ -748,51 +734,30 @@ void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedReg
   // TUNING: Use ldp.
   // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
   size_t reg_offset = frame_size;
-  reg_offset -= 8;
-  LoadFromOffset(LR, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X29, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X28, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X27, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X26, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X25, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X24, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X23, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X22, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X21, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X20, SP, reg_offset);
-
-  reg_offset -= 8;
-  LoadDFromOffset(D15, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D14, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D13, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D12, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D11, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D10, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D9, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D8, SP, reg_offset);
+  static constexpr XRegister x_spills[] = {
+      LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+  for (size_t i = 0; i < arraysize(x_spills); i++) {
+    XRegister reg = x_spills[i];
+    reg_offset -= 8;
+    LoadFromOffset(reg, SP, reg_offset);
+    cfi_.Restore(DWARFReg(reg));
+  }
+  for (int d = 15; d >= 8; d--) {
+    DRegister reg = static_cast<DRegister>(d);
+    reg_offset -= 8;
+    LoadDFromOffset(reg, SP, reg_offset);
+    cfi_.Restore(DWARFReg(reg));
+  }
 
   // Decrease frame size to start of callee saved regs.
   DecreaseFrameSize(frame_size);
 
   // Pop callee saved and return to LR.
   ___ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 }  // namespace arm64
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index a69be2599e..8973b9ca8a 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -30,9 +30,11 @@
 
 // TODO: make vixl clean wrt -Wshadow.
 #pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
 #pragma GCC diagnostic ignored "-Wshadow"
-#include "a64/macro-assembler-a64.h"
-#include "a64/disasm-a64.h"
+#pragma GCC diagnostic ignored "-Wmissing-noreturn"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
 #pragma GCC diagnostic pop
 
 namespace art {
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index e1d6f3179d..62c1d4dbee 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "constants_arm64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index b1b0ee5e53..ff5a77c97a 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -89,6 +89,8 @@ class ArrayRef {
       : array_(v.data()), size_(v.size()) {
   }
 
+  ArrayRef(const ArrayRef&) = default;
+
   // Assignment operators.
 
   ArrayRef& operator=(const ArrayRef& other) {
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 5340dd3a25..36342c61c5 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -105,6 +105,9 @@ void AssemblerBuffer::ExtendCapacity() {
   CHECK_EQ(Size(), old_size);
 }
 
+void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
+  this->AdvancePC(assembler_->CodeSize());
+}
 
 Assembler* Assembler::Create(InstructionSet instruction_set) {
   switch (instruction_set) {
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 923ecdbd9d..ebafd3dd1e 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -29,6 +29,7 @@
 #include "offsets.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
+#include "dwarf/debug_frame_opcode_writer.h"
 
 namespace art {
 
@@ -354,6 +355,23 @@ class AssemblerBuffer {
   friend class AssemblerFixup;
 };
 
+// The purpose of this class is to ensure that we do not have to explicitly
+// call the AdvancePC method (which is good for convenience and correctness).
+class DebugFrameOpCodeWriterForAssembler FINAL
+    : public dwarf::DebugFrameOpCodeWriter<> {
+ public:
+  // This method is called the by the opcode writers.
+  virtual void ImplicitlyAdvancePC() FINAL;
+
+  explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
+      : dwarf::DebugFrameOpCodeWriter<>(),
+        assembler_(buffer) {
+  }
+
+ private:
+  Assembler* assembler_;
+};
+
 class Assembler {
  public:
   static Assembler* Create(InstructionSet instruction_set);
@@ -504,18 +522,20 @@ class Assembler {
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
-  virtual void InitializeFrameDescriptionEntry() {}
-  virtual void FinalizeFrameDescriptionEntry() {}
-  // Give a vector containing FDE data, or null if not used. Note: the assembler must take care
-  // of handling the lifecycle.
-  virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
-
   virtual ~Assembler() {}
 
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
+
  protected:
-  Assembler() : buffer_() {}
+  Assembler() : buffer_(), cfi_(this) {}
 
   AssemblerBuffer buffer_;
+
+  DebugFrameOpCodeWriterForAssembler cfi_;
 };
 
 }  // namespace art
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 6f8b3012a4..3fe1a31d70 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -44,7 +44,9 @@ static std::string tmpnam_;
 
 enum class RegisterView {  // private
   kUsePrimaryName,
-  kUseSecondaryName
+  kUseSecondaryName,
+  kUseTertiaryName,
+  kUseQuaternaryName,
 };
 
 template<typename Ass, typename Reg, typename FPReg, typename Imm>
@@ -97,6 +99,15 @@ class AssemblerTest : public testing::Test {
         fmt);
   }
 
+  std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+        fmt);
+  }
+
   std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
@@ -123,6 +134,16 @@ class AssemblerTest : public testing::Test {
                                                   fmt);
   }
 
+  std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) {
+    return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
+                                                  GetFPRegisters(),
+                                                  GetFPRegisters(),
+                                                  &AssemblerTest::GetFPRegName,
+                                                  &AssemblerTest::GetFPRegName,
+                                                  imm_bytes,
+                                                  fmt);
+  }
+
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
@@ -230,6 +251,18 @@ class AssemblerTest : public testing::Test {
     UNREACHABLE();
   }
 
+  // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
+  virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
+    UNREACHABLE();
+  }
+
+  // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
+  virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
+    UNREACHABLE();
+  }
+
   std::string GetRegisterName(const Reg& reg) {
     return GetRegName<RegisterView::kUsePrimaryName>(reg);
   }
@@ -448,6 +481,57 @@ class AssemblerTest : public testing::Test {
     return str;
   }
 
+  template <typename Reg1, typename Reg2>
+  std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
+                                          const std::vector<Reg1*> reg1_registers,
+                                          const std::vector<Reg2*> reg2_registers,
+                                          std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                          std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                          size_t imm_bytes,
+                                          std::string fmt) {
+    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+    WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          Imm new_imm = CreateImmediate(imm);
+          (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+          std::string base = fmt;
+
+          std::string reg1_string = (this->*GetName1)(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = (this->*GetName2)(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          size_t imm_index = base.find(IMM_TOKEN);
+          if (imm_index != std::string::npos) {
+            std::ostringstream sreg;
+            sreg << imm;
+            std::string imm_string = sreg.str();
+            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <RegisterView kRegView>
   std::string GetRegName(const Reg& reg) {
     std::ostringstream sreg;
@@ -459,6 +543,14 @@ class AssemblerTest : public testing::Test {
       case RegisterView::kUseSecondaryName:
         sreg << GetSecondaryRegisterName(reg);
         break;
+
+      case RegisterView::kUseTertiaryName:
+        sreg << GetTertiaryRegisterName(reg);
+        break;
+
+      case RegisterView::kUseQuaternaryName:
+        sreg << GetQuaternaryRegisterName(reg);
+        break;
     }
     return sreg.str();
   }
diff --git a/compiler/utils/dex_cache_arrays_layout-inl.h b/compiler/utils/dex_cache_arrays_layout-inl.h
new file mode 100644
index 0000000000..7d02ce35d8
--- /dev/null
+++ b/compiler/utils/dex_cache_arrays_layout-inl.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
+#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
+
+#include "dex_cache_arrays_layout.h"
+
+#include "base/logging.h"
+#include "globals.h"
+#include "mirror/array-inl.h"
+#include "primitive.h"
+#include "utils.h"
+
+namespace mirror {
+class ArtField;
+class ArtMethod;
+class Class;
+class String;
+}  // namespace mirror
+
+namespace art {
+
+inline DexCacheArraysLayout::DexCacheArraysLayout(const DexFile* dex_file)
+    : /* types_offset_ is always 0u */
+      methods_offset_(types_offset_ + ArraySize<mirror::Class>(dex_file->NumTypeIds())),
+      strings_offset_(methods_offset_ + ArraySize<mirror::ArtMethod>(dex_file->NumMethodIds())),
+      fields_offset_(strings_offset_ + ArraySize<mirror::String>(dex_file->NumStringIds())),
+      size_(fields_offset_ + ArraySize<mirror::ArtField>(dex_file->NumFieldIds())) {
+}
+
+inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
+  return types_offset_ + ElementOffset<mirror::Class>(type_idx);
+}
+
+inline size_t DexCacheArraysLayout::MethodOffset(uint32_t method_idx) const {
+  return methods_offset_ + ElementOffset<mirror::ArtMethod>(method_idx);
+}
+
+inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
+  return strings_offset_ + ElementOffset<mirror::String>(string_idx);
+}
+
+inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
+  return fields_offset_ + ElementOffset<mirror::ArtField>(field_idx);
+}
+
+template <typename MirrorType>
+inline size_t DexCacheArraysLayout::ElementOffset(uint32_t idx) {
+  return mirror::Array::DataOffset(sizeof(mirror::HeapReference<MirrorType>)).Uint32Value() +
+      sizeof(mirror::HeapReference<MirrorType>) * idx;
+}
+
+template <typename MirrorType>
+inline size_t DexCacheArraysLayout::ArraySize(uint32_t num_elements) {
+  size_t array_size = mirror::ComputeArraySize(
+      num_elements, ComponentSizeShiftWidth<sizeof(mirror::HeapReference<MirrorType>)>());
+  DCHECK_NE(array_size, 0u);  // No overflow expected for dex cache arrays.
+  return RoundUp(array_size, kObjectAlignment);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h
new file mode 100644
index 0000000000..b461256f63
--- /dev/null
+++ b/compiler/utils/dex_cache_arrays_layout.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
+#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
+
+namespace art {
+
+/**
+ * @class DexCacheArraysLayout
+ * @details This class provides the layout information for the type, method, field and
+ * string arrays for a DexCache with a fixed arrays' layout (such as in the boot image),
+ */
+class DexCacheArraysLayout {
+ public:
+  // Construct an invalid layout.
+  DexCacheArraysLayout()
+      : /* types_offset_ is always 0u */
+        methods_offset_(0u),
+        strings_offset_(0u),
+        fields_offset_(0u),
+        size_(0u) {
+  }
+
+  // Construct a layout for a particular dex file.
+  explicit DexCacheArraysLayout(const DexFile* dex_file);
+
+  bool Valid() const {
+    return Size() != 0u;
+  }
+
+  size_t Size() const {
+    return size_;
+  }
+
+  size_t TypesOffset() const {
+    return types_offset_;
+  }
+
+  size_t TypeOffset(uint32_t type_idx) const;
+
+  size_t MethodsOffset() const {
+    return methods_offset_;
+  }
+
+  size_t MethodOffset(uint32_t method_idx) const;
+
+  size_t StringsOffset() const {
+    return strings_offset_;
+  }
+
+  size_t StringOffset(uint32_t string_idx) const;
+
+  size_t FieldsOffset() const {
+    return fields_offset_;
+  }
+
+  size_t FieldOffset(uint32_t field_idx) const;
+
+ private:
+  static constexpr size_t types_offset_ = 0u;
+  const size_t methods_offset_;
+  const size_t strings_offset_;
+  const size_t fields_offset_;
+  const size_t size_;
+
+  template <typename MirrorType>
+  static size_t ElementOffset(uint32_t idx);
+
+  template <typename MirrorType>
+  static size_t ArraySize(uint32_t num_elements);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
deleted file mode 100644
index a7e09c6517..0000000000
--- a/compiler/utils/dwarf_cfi.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "leb128.h"
-#include "utils.h"
-
-#include "dwarf_cfi.h"
-
-namespace art {
-
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
-  if (increment < 64) {
-    // Encoding in opcode.
-    buf->push_back(0x1 << 6 | increment);
-  } else if (increment < 256) {
-    // Single byte delta.
-    buf->push_back(0x02);
-    buf->push_back(increment);
-  } else if (increment < 256 * 256) {
-    // Two byte delta.
-    buf->push_back(0x03);
-    buf->push_back(increment & 0xff);
-    buf->push_back((increment >> 8) & 0xff);
-  } else {
-    // Four byte delta.
-    buf->push_back(0x04);
-    Push32(buf, increment);
-  }
-}
-
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
-  buf->push_back(0x11);
-  EncodeUnsignedLeb128(reg, buf);
-  EncodeSignedLeb128(offset, buf);
-}
-
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
-  buf->push_back((0x2 << 6) | reg);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
-  buf->push_back(0x0e);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0a);
-}
-
-void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0b);
-}
-
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
-  // 'length' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0xffffffff);  // Indicates 64bit
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'CIE_pointer' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'initial_location' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'address_range' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // Augmentation length: 0
-  buf->push_back(0);
-}
-
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) {
-  const size_t kOffsetOfAddressRange = is_64bit? 28 : 12;
-  CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4));
-
-  uint8_t *p = buf->data() + kOffsetOfAddressRange;
-  if (is_64bit) {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-    p[4] = data >> 32;
-    p[5] = data >> 40;
-    p[6] = data >> 48;
-    p[7] = data >> 56;
-  } else {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-  }
-}
-
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) {
-  uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4;
-  DCHECK_EQ((length & 0x3), 0U);
-
-  uint8_t *p = is_64bit? buf->data() + 4 : buf->data();
-  if (is_64bit) {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-    p[4] = length >> 32;
-    p[5] = length >> 40;
-    p[6] = length >> 48;
-    p[7] = length >> 56;
-  } else {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-  }
-}
-
-void PadCFI(std::vector<uint8_t>* buf) {
-  while (buf->size() & 0x3) {
-    buf->push_back(0);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
deleted file mode 100644
index 0c8b1516dd..0000000000
--- a/compiler/utils/dwarf_cfi.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
-#define ART_COMPILER_UTILS_DWARF_CFI_H_
-
-#include <vector>
-
-namespace art {
-
-/**
- * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
-
-/**
- * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param offset New offset of CFA.
- */
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_remember_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_restore_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Write FDE header into an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Set 'address_range' field of an FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit);
-
-/**
- * @brief Set 'length' field of an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
- * @param buf FDE buffer.
- */
-void PadCFI(std::vector<uint8_t>* buf);
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b5437b0eda..709a911f6a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -536,6 +536,10 @@ void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset)
   Sdc1(reg, base, offset);
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::MipsCore(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 4;
 
 void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -549,10 +553,12 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   // Push callee saves and return address
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreWord, RA, SP, stack_offset);
+  cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
+    cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
 
   // Write out Method*.
@@ -568,31 +574,40 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void MipsAssembler::RemoveFrame(size_t frame_size,
                                 const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
+    cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
   }
   LoadFromOffset(kLoadWord, RA, SP, stack_offset);
+  cfi_.Restore(DWARFReg(RA));
 
   // Decrease frame to required size.
   DecreaseFrameSize(frame_size);
 
   // Then jump to the return address.
   Jr(RA);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void MipsAssembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index dd55cc4e6a..40d39e3386 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_
 
 #include "constants_mips.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 233ae7db3c..282ab96ce4 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -568,6 +568,10 @@ void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, G
   }
 }
 
+static dwarf::Reg DWARFReg(GpuRegister reg) {
+  return dwarf::Reg::Mips64Core(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 8;
 
 void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -581,10 +585,12 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   // Push callee saves and return address
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreDoubleword, RA, SP, stack_offset);
+  cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
     GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
+    cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
 
   // Write out Method*.
@@ -612,31 +618,40 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void Mips64Assembler::RemoveFrame(size_t frame_size,
                                   const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
+    cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
   }
   LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset);
+  cfi_.Restore(DWARFReg(RA));
 
   // Decrease frame to required size.
   DecreaseFrameSize(frame_size);
 
   // Then jump to the return address.
   Jr(RA);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant64(SP, SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant64(SP, SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
@@ -1025,7 +1040,7 @@ void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
   __ Move(A0, scratch_.AsGpuRegister());
   // Set up call to Thread::Current()->pDeliverException
   __ LoadFromOffset(kLoadDoubleword, T9, S1,
-                    QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
+                    QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
   __ Jr(T9);
   // Call never returns
   __ Break();
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 924a928389..4c4705bbfb 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
 
 #include "constants_mips64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5773459ff5..f8bba07f84 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86 {
@@ -695,6 +694,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
 }
 
 
+void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x3A);
+  EmitUint8(0x0B);
+  EmitXmmRegisterOperand(dst, src);
+  EmitUint8(imm.value());
+}
+
+
+void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x3A);
+  EmitUint8(0x0A);
+  EmitXmmRegisterOperand(dst, src);
+  EmitUint8(imm.value());
+}
+
+
 void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
@@ -1264,32 +1285,62 @@ void X86Assembler::decl(const Address& address) {
 
 
 void X86Assembler::shll(Register reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(4, Operand(reg), imm);
 }
 
 
 void X86Assembler::shll(Register operand, Register shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(4, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shll(const Address& address, const Immediate& imm) {
+  EmitGenericShift(4, address, imm);
+}
+
+
+void X86Assembler::shll(const Address& address, Register shifter) {
+  EmitGenericShift(4, address, shifter);
 }
 
 
 void X86Assembler::shrl(Register reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(5, Operand(reg), imm);
 }
 
 
 void X86Assembler::shrl(Register operand, Register shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(5, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shrl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(5, address, imm);
+}
+
+
+void X86Assembler::shrl(const Address& address, Register shifter) {
+  EmitGenericShift(5, address, shifter);
 }
 
 
 void X86Assembler::sarl(Register reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(7, Operand(reg), imm);
 }
 
 
 void X86Assembler::sarl(Register operand, Register shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(7, Operand(operand), shifter);
+}
+
+
+void X86Assembler::sarl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(7, address, imm);
+}
+
+
+void X86Assembler::sarl(const Address& address, Register shifter) {
+  EmitGenericShift(7, address, shifter);
 }
 
 
@@ -1302,6 +1353,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) {
 }
 
 
+void X86Assembler::shld(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xA4);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::shrd(Register dst, Register src, Register shifter) {
   DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1311,6 +1371,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) {
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAC);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
@@ -1445,6 +1514,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) {
   EmitOperand(reg, address);
 }
 
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xC7);
+  EmitOperand(1, address);
+}
+
+
 void X86Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1585,38 +1663,32 @@ void X86Assembler::EmitLabelLink(Label* label) {
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register reg,
+                                    const Operand& operand,
                                     const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
   if (imm.value() == 1) {
     EmitUint8(0xD1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
   } else {
     EmitUint8(0xC1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
     EmitUint8(imm.value() & 0xFF);
   }
 }
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register operand,
+                                    const Operand& operand,
                                     Register shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter, ECX);
   EmitUint8(0xD3);
-  EmitOperand(reg_or_opcode, Operand(operand));
-}
-
-void X86Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, false /* is_64bit */);
+  EmitOperand(reg_or_opcode, operand);
 }
 
-void X86Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, false /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86Core(static_cast<int>(reg));
 }
 
 constexpr size_t kFramePointerSize = 4;
@@ -1624,54 +1696,33 @@ constexpr size_t kFramePointerSize = 4;
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
+  cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    pushl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    pushl(spill);
     gpr_count++;
-
-    // DW_CFA_advance_loc
-    DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-    cfi_pc_ = buffer_.Size();
-    // DW_CFA_def_cfa_offset
-    cfi_cfa_offset_ += kFramePointerSize;
-    DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-    // DW_CFA_offset reg offset
-    reg_offset++;
-    DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
+    cfi_.AdjustCFAOffset(kFramePointerSize);
+    cfi_.RelOffset(DWARFReg(spill), 0);
   }
 
-  // return address then method on stack
+  // return address then method on stack.
   int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-
+  cfi_.AdjustCFAOffset(adjust);
   pushl(method_reg.AsX86().AsCpuRegister());
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += kFramePointerSize;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(kFramePointerSize);
+  DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
     if (spill.AsX86().IsCpuRegister()) {
-      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+      int offset = frame_size + spill.getSpillOffset();
+      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
     } else {
       DCHECK(spill.AsX86().IsXmmRegister());
       if (spill.getSize() == 8) {
@@ -1687,30 +1738,33 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
-                      sizeof(StackReference<mirror::ArtMethod>)));
+  cfi_.RememberState();
+  int adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+               sizeof(StackReference<mirror::ArtMethod>);
+  addl(ESP, Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    popl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    popl(spill);
+    cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+    cfi_.Restore(DWARFReg(spill));
   }
   ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void X86Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6ccf2e365d..37acb6ef16 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -205,7 +205,7 @@ class Address : public Operand {
 
 class X86Assembler FINAL : public Assembler {
  public:
-  explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  explicit X86Assembler() {}
   virtual ~X86Assembler() {}
 
   /*
@@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler {
   void ucomiss(XmmRegister a, XmmRegister b);
   void ucomisd(XmmRegister a, XmmRegister b);
 
+  void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+  void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
   void sqrtsd(XmmRegister dst, XmmRegister src);
   void sqrtss(XmmRegister dst, XmmRegister src);
 
@@ -426,12 +429,20 @@ class X86Assembler FINAL : public Assembler {
 
   void shll(Register reg, const Immediate& imm);
   void shll(Register operand, Register shifter);
+  void shll(const Address& address, const Immediate& imm);
+  void shll(const Address& address, Register shifter);
   void shrl(Register reg, const Immediate& imm);
   void shrl(Register operand, Register shifter);
+  void shrl(const Address& address, const Immediate& imm);
+  void shrl(const Address& address, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
+  void sarl(const Address& address, const Immediate& imm);
+  void sarl(const Address& address, Register shifter);
   void shld(Register dst, Register src, Register shifter);
+  void shld(Register dst, Register src, const Immediate& imm);
   void shrd(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, const Immediate& imm);
 
   void negl(Register reg);
   void notl(Register reg);
@@ -454,6 +465,7 @@ class X86Assembler FINAL : public Assembler {
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
+  void cmpxchg8b(const Address& address);
 
   void mfence();
 
@@ -473,6 +485,10 @@ class X86Assembler FINAL : public Assembler {
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchg8b(const Address& address) {
+    lock()->cmpxchg8b(address);
+  }
+
   //
   // Misc. functionality
   //
@@ -596,12 +612,6 @@ class X86Assembler FINAL : public Assembler {
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
@@ -617,11 +627,8 @@ class X86Assembler FINAL : public Assembler {
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, Register reg, const Immediate& imm);
-  void EmitGenericShift(int rm, Register operand, Register shifter);
-
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
+  void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
+  void EmitGenericShift(int rm, const Operand& operand, Register shifter);
 
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510afb..dba3b6ba67 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) {
   DriverStr(expected, "LoadLongConstant");
 }
 
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::EDI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+      x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+      x86::Register(x86::ESI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %ESI, (%EBP)\n"
+    "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+  const char* expected =
+    "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b (%EBP)\n"
+    "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg8b");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 5d46ee25cd..4e8c41e217 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
 
 #include "constants_x86.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -88,14 +89,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    // For all the X86 registers we care about:
-    // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-    // DWARF register id is the same as id_.
-    return static_cast<int>(id_);
-  }
-
   ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b8c757c05d..638659d635 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86_64 {
@@ -210,7 +209,9 @@ void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
 
 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalByteRegNormalizingRex32(dst, src);
+  // Byte register is only in the source register form, so we don't use
+  // EmitOptionalByteRegNormalizingRex32(dst, src);
+  EmitOptionalRex32(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0xB6);
   EmitOperand(dst.LowBits(), src);
@@ -228,7 +229,9 @@ void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
 
 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalByteRegNormalizingRex32(dst, src);
+  // Byte register is only in the source register form, so we don't use
+  // EmitOptionalByteRegNormalizingRex32(dst, src);
+  EmitOptionalRex32(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0xBE);
   EmitOperand(dst.LowBits(), src);
@@ -796,6 +799,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
 }
 
 
+void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x3A);
+  EmitUint8(0x0B);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+  EmitUint8(imm.value());
+}
+
+
+void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x3A);
+  EmitUint8(0x0A);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+  EmitUint8(imm.value());
+}
+
+
 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
@@ -1838,11 +1865,22 @@ X86_64Assembler* X86_64Assembler::lock() {
 
 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
+  EmitUint8(0x0F);
+  EmitUint8(0xB1);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg, address);
   EmitUint8(0x0F);
   EmitUint8(0xB1);
   EmitOperand(reg.LowBits(), address);
 }
 
+
 void X86_64Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1941,6 +1979,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand)
   for (int i = 1; i < length; i++) {
     EmitUint8(operand.encoding_[i]);
   }
+  AssemblerFixup* fixup = operand.GetFixup();
+  if (fixup != nullptr) {
+    EmitFixup(fixup);
+  }
 }
 
 
@@ -2139,11 +2181,18 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
-  EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex());
+  // For src, SPL, BPL, SIL, DIL need the rex prefix.
+  bool force = src.AsRegister() > 3;
+  EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
-  uint8_t rex = 0x40 | operand.rex();  // REX.0000
+  uint8_t rex = operand.rex();
+  // For dst, SPL, BPL, SIL, DIL need the rex prefix.
+  bool force = dst.AsRegister() > 3;
+  if (force) {
+    rex |= 0x40;  // REX.0000
+  }
   if (dst.NeedsRex()) {
     rex |= 0x44;  // REX.0R00
   }
@@ -2152,14 +2201,11 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const
   }
 }
 
-void X86_64Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
 }
-
-void X86_64Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
 }
 
 constexpr size_t kFramePointerSize = 8;
@@ -2167,11 +2213,8 @@ constexpr size_t kFramePointerSize = 8;
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
+  cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -2179,29 +2222,16 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
-
-      // DW_CFA_advance_loc
-      DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-      cfi_pc_ = buffer_.Size();
-      // DW_CFA_def_cfa_offset
-      cfi_cfa_offset_ += kFramePointerSize;
-      DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-      // DW_CFA_offset reg offset
-      reg_offset++;
-      DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
+      cfi_.AdjustCFAOffset(kFramePointerSize);
+      cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
     }
   }
-  // return address then method on stack
+  // return address then method on stack.
   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
   subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += rest_of_frame;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(rest_of_frame);
 
   // spill xmms
   int64_t offset = rest_of_frame;
@@ -2210,6 +2240,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
     if (spill.IsXmmRegister()) {
       offset -= sizeof(double);
       movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+      cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
     }
   }
 
@@ -2241,6 +2272,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void X86_64Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
   int gpr_count = 0;
   // unspill xmms
   int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
@@ -2249,34 +2281,38 @@ void X86_64Assembler::RemoveFrame(size_t frame_size,
     if (spill.IsXmmRegister()) {
       offset += sizeof(double);
       movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+      cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
     } else {
       gpr_count++;
     }
   }
-  addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+  addq(CpuRegister(RSP), Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
     if (spill.IsCpuRegister()) {
       popq(spill.AsCpuRegister());
+      cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+      cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
     }
   }
   ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
@@ -2704,5 +2740,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
 #undef __
 }
 
+void X86_64Assembler::AddConstantArea() {
+  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  for (size_t i = 0, e = area.size(); i < e; i++) {
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    EmitInt32(area[i]);
+  }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
+    if (v == buffer_[i]) {
+      return i * elem_size_;
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+  int32_t v_low = v;
+  int32_t v_high = v >> 32;
+  if (buffer_.size() > 1) {
+    // Ensure we don't pass the end of the buffer.
+    for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
+      if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
+        return i * elem_size_;
+      }
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v_low);
+  buffer_.push_back(v_high);
+  return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+  // Treat the value as a 64-bit integer value.
+  return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+  // Treat the value as a 32-bit integer value.
+  return AddInt32(bit_cast<int32_t, float>(v));
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index e2fd5fbb80..15b8b15c74 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@ class Operand : public ValueObject {
         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
   }
 
+  AssemblerFixup* GetFixup() const {
+    return fixup_;
+  }
+
  protected:
   // Operand can be sub classed (e.g: Address).
-  Operand() : rex_(0), length_(0) { }
+  Operand() : rex_(0), length_(0), fixup_(nullptr) { }
 
   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
     CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@ class Operand : public ValueObject {
     length_ += disp_size;
   }
 
+  void SetFixup(AssemblerFixup* fixup) {
+    fixup_ = fixup;
+  }
+
  private:
   uint8_t rex_;
   uint8_t length_;
   uint8_t encoding_[6];
+  AssemblerFixup* fixup_;
 
-  explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+  explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
 
   // Get the operand encoding byte at the given index.
   uint8_t encoding_at(int index_in) const {
@@ -226,12 +235,25 @@ class Address : public Operand {
       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
       result.SetDisp32(addr);
     } else {
+      // RIP addressing is done using RBP as the base register.
+      // The value in RBP isn't used.  Instead the offset is added to RIP.
       result.SetModRM(0, CpuRegister(RBP));
       result.SetDisp32(addr);
     }
     return result;
   }
 
+  // An RIP relative address that will be fixed up later.
+  static Address RIP(AssemblerFixup* fixup) {
+    Address result;
+    // RIP addressing is done using RBP as the base register.
+    // The value in RBP isn't used.  Instead the offset is added to RIP.
+    result.SetModRM(0, CpuRegister(RBP));
+    result.SetDisp32(0);
+    result.SetFixup(fixup);
+    return result;
+  }
+
   // If no_rip is true then the Absolute address isn't RIP relative.
   static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
     return Absolute(addr.Int32Value(), no_rip);
@@ -242,9 +264,46 @@ class Address : public Operand {
 };
 
 
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+  public:
+    ConstantArea() {}
+
+    // Add a double to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddDouble(double v);
+
+    // Add a float to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddFloat(float v);
+
+    // Add an int32_t to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddInt32(int32_t v);
+
+    // Add an int64_t to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddInt64(int64_t v);
+
+    int GetSize() const {
+      return buffer_.size() * elem_size_;
+    }
+
+    const std::vector<int32_t>& GetBuffer() const {
+      return buffer_;
+    }
+
+  private:
+    static constexpr size_t elem_size_ = sizeof(int32_t);
+    std::vector<int32_t> buffer_;
+};
+
+
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  X86_64Assembler() {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -353,6 +412,9 @@ class X86_64Assembler FINAL : public Assembler {
   void ucomiss(XmmRegister a, XmmRegister b);
   void ucomisd(XmmRegister a, XmmRegister b);
 
+  void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+  void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
   void sqrtsd(XmmRegister dst, XmmRegister src);
   void sqrtss(XmmRegister dst, XmmRegister src);
 
@@ -515,6 +577,7 @@ class X86_64Assembler FINAL : public Assembler {
 
   X86_64Assembler* lock();
   void cmpxchgl(const Address& address, CpuRegister reg);
+  void cmpxchgq(const Address& address, CpuRegister reg);
 
   void mfence();
 
@@ -537,6 +600,10 @@ class X86_64Assembler FINAL : public Assembler {
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchgq(const Address& address, CpuRegister reg) {
+    lock()->cmpxchgq(address, reg);
+  }
+
   //
   // Misc. functionality
   //
@@ -661,11 +728,27 @@ class X86_64Assembler FINAL : public Assembler {
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
+  // Add a double to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+  // Add a float to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+
+  // Add an int32_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+  // Add an int64_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+  // Add the contents of the constant area to the assembler buffer.
+  void AddConstantArea();
+
+  // Is the constant area empty? Return true if there are no literals in the constant area.
+  bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
  private:
   void EmitUint8(uint8_t value);
@@ -712,8 +795,7 @@ class X86_64Assembler FINAL : public Assembler {
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
+  ConstantArea constant_area_;
 
   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
 };
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index c2052c7732..116190a832 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -174,6 +174,40 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d");
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d");
 
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w");
+
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
+
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
@@ -216,9 +250,21 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64
     return secondary_register_names_[reg];
   }
 
+  std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+    CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end());
+    return tertiary_register_names_[reg];
+  }
+
+  std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+    CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end());
+    return quaternary_register_names_[reg];
+  }
+
  private:
   std::vector<x86_64::CpuRegister*> registers_;
   std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
+  std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
+  std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
 
   std::vector<x86_64::XmmRegister*> fp_registers_;
 };
@@ -543,6 +589,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) {
   // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
 }
 
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %ESI, (%R13)\n"
+    "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %RSI, (%R13)\n"
+    "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg");
+}
+
 TEST_F(AssemblerX86_64Test, Movl) {
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -696,6 +792,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd");
 }
 
+TEST_F(AssemblerX86_64Test, Roundss) {
+  DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
+}
+
+TEST_F(AssemblerX86_64Test, Roundsd) {
+  DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
+}
+
 TEST_F(AssemblerX86_64Test, Xorps) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps");
 }
@@ -820,31 +924,12 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
                                "l", "ge", "le" };
 
   std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
-  std::string byte_regs[16];
-  byte_regs[x86_64::RAX] = "al";
-  byte_regs[x86_64::RBX] = "bl";
-  byte_regs[x86_64::RCX] = "cl";
-  byte_regs[x86_64::RDX] = "dl";
-  byte_regs[x86_64::RBP] = "bpl";
-  byte_regs[x86_64::RSP] = "spl";
-  byte_regs[x86_64::RSI] = "sil";
-  byte_regs[x86_64::RDI] = "dil";
-  byte_regs[x86_64::R8] = "r8b";
-  byte_regs[x86_64::R9] = "r9b";
-  byte_regs[x86_64::R10] = "r10b";
-  byte_regs[x86_64::R11] = "r11b";
-  byte_regs[x86_64::R12] = "r12b";
-  byte_regs[x86_64::R13] = "r13b";
-  byte_regs[x86_64::R14] = "r14b";
-  byte_regs[x86_64::R15] = "r15b";
-
   std::ostringstream str;
 
   for (auto reg : registers) {
     for (size_t i = 0; i < 15; ++i) {
       assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
-      str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n";
+      str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
     }
   }
 
@@ -975,4 +1060,12 @@ TEST_F(AssemblerX86_64Test, DecreaseFrame) {
   DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
 }
 
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 3a96ad0b51..47bbb44fc8 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
 
 #include "constants_x86_64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -87,21 +88,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    switch (id_) {
-      case RAX: return  0;
-      case RDX: return  1;
-      case RCX: return  2;
-      case RBX: return  3;
-      case RSI: return  4;
-      case RDI: return  5;
-      case RBP: return  6;
-      case RSP: return  7;
-      default: return static_cast<int>(id_);  // R8 ~ R15
-    }
-  }
-
   CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));