90 files changed, 3419 insertions, 1270 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 0e2dad9355..10bb90b5ad 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -188,6 +188,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \
   compiler/dex/local_value_numbering_test.cc \
   compiler/dex/mir_graph_test.cc \
   compiler/dex/mir_optimization_test.cc \
+  compiler/dex/quick/quick_cfi_test.cc \
   compiler/dwarf/dwarf_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
@@ -405,7 +406,7 @@ define define-art-gtest
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   LOCAL_SRC_FILES := $$(art_gtest_filename)
   LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
-  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest
+  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libart-disassembler
   LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
diff --git a/compiler/Android.mk b/compiler/Android.mk
index eaea031b62..94322a8315 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
+	dex/quick/lazy_debug_frame_opcode_writer.cc \
 	dex/quick/local_optimizations.cc \
 	dex/quick/mips/assemble_mips.cc \
 	dex/quick/mips/call_mips.cc \
@@ -103,6 +104,7 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/code_generator_arm64.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
+	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
 	optimizing/graph_checker.cc \
@@ -138,7 +140,6 @@ LIBART_COMPILER_SRC_FILES := \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
-	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 	utils/mips64/assembler_mips64.cc \
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
new file mode 100644
index 0000000000..f550395dad
--- /dev/null
+++ b/compiler/cfi_test.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_CFI_TEST_H_
+#define ART_COMPILER_CFI_TEST_H_
+
+#include <vector>
+#include <memory>
+#include <sstream>
+
+#include "arch/instruction_set.h"
+#include "dwarf/debug_frame_writer.h"
+#include "dwarf/dwarf_test.h"
+#include "disassembler/disassembler.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class CFITest : public dwarf::DwarfTest {
+ public:
+  void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str,
+                        const std::vector<uint8_t>& actual_asm,
+                        const std::vector<uint8_t>& actual_cfi) {
+    std::vector<std::string> lines;
+    // Print the raw bytes.
+    fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str);
+    HexDump(f, actual_asm);
+    fprintf(f, "\n};\n");
+    fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str);
+    HexDump(f, actual_cfi);
+    fprintf(f, "\n};\n");
+    // Pretty-print CFI opcodes.
+    dwarf::DebugFrameWriter<> eh_frame(&eh_frame_data_, false);
+    eh_frame.WriteCIE(dwarf::Reg(8), {});
+    eh_frame.WriteFDE(0, actual_asm.size(), actual_cfi.data(), actual_cfi.size());
+    ReformatCfi(Objdump(false, "-W"), &lines);
+    // Pretty-print assembly.
+    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
+    std::stringstream stream;
+    const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
+    disasm->Dump(stream, base, base + actual_asm.size());
+    ReformatAsm(&stream, &lines);
+    // Print CFI and assembly interleaved.
+    std::stable_sort(lines.begin(), lines.end(), CompareByAddress);
+    for (const std::string& line : lines) {
+      fprintf(f, "// %s\n", line.c_str());
+    }
+    fprintf(f, "\n");
+  }
+
+ private:
+  // Helper - get offset just past the end of given string.
+  static size_t FindEndOf(const std::string& str, const char* substr) {
+    size_t pos = str.find(substr);
+    CHECK_NE(std::string::npos, pos);
+    return pos + strlen(substr);
+  }
+
+  // Spit to lines and remove raw instruction bytes.
+  static void ReformatAsm(std::stringstream* stream,
+                          std::vector<std::string>* output) {
+    std::string line;
+    while (std::getline(*stream, line)) {
+      line = line.substr(0, FindEndOf(line, ": ")) +
+             line.substr(FindEndOf(line, "\t"));
+      size_t pos;
+      while ((pos = line.find("  ")) != std::string::npos) {
+        line = line.replace(pos, 2, " ");
+      }
+      while (!line.empty() && line.back() == ' ') {
+        line.pop_back();
+      }
+      output->push_back(line);
+    }
+  }
+
+  // Find interesting parts of objdump output and prefix the lines with address.
+  static void ReformatCfi(const std::vector<std::string>& lines,
+                          std::vector<std::string>* output) {
+    std::string address;
+    for (const std::string& line : lines) {
+      if (line.find("DW_CFA_nop") != std::string::npos) {
+        // Ignore.
+      } else if (line.find("DW_CFA_advance_loc") != std::string::npos) {
+        // The last 8 characters are the address.
+        address = "0x" + line.substr(line.size() - 8);
+      } else if (line.find("DW_CFA_") != std::string::npos) {
+        std::string new_line(line);
+        // "bad register" warning is caused by always using host (x86) objdump.
+        const char* bad_reg = "bad register: ";
+        size_t pos;
+        if ((pos = new_line.find(bad_reg)) != std::string::npos) {
+          new_line = new_line.replace(pos, strlen(bad_reg), "");
+        }
+        // Remove register names in parentheses since they have x86 names.
+        if ((pos = new_line.find(" (")) != std::string::npos) {
+          new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, "");
+        }
+        // Use the .cfi_ prefix.
+        new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
+        output->push_back(address + ": " + new_line);
+      }
+    }
+  }
+
+  // Compare strings by the address prefix.
+  static bool CompareByAddress(const std::string& lhs, const std::string& rhs) {
+    EXPECT_EQ(lhs[10], ':');
+    EXPECT_EQ(rhs[10], ':');
+    return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0;
+  }
+
+  // Pretty-print byte array.  12 bytes per line.
+  static void HexDump(FILE* f, const std::vector<uint8_t>& data) {
+    for (size_t i = 0; i < data.size(); i++) {
+      fprintf(f, i % 12 == 0 ? "\n    " : " ");  // Whitespace.
+      fprintf(f, "0x%02X,", data[i]);
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_CFI_TEST_H_
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 6ec39f9605..a04641e3fa 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -107,6 +107,9 @@ class Compiler {
     return driver_;
   }
 
+  // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+  static constexpr bool kProduce64BitELFFiles = false;
+
  private:
   CompilerDriver* const driver_;
   const uint64_t maximum_compilation_time_before_warning_;
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 93d83c6fd4..0850f42a9a 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -403,13 +403,6 @@ class SuspendCheckElimination : public PassME {
     DCHECK(bb != nullptr);
     return c_unit->mir_graph->EliminateSuspendChecks(bb);
   }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateSuspendChecksEnd();
-  }
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f638b0bf4d..2a920a4e29 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() {
   InitializeBasicBlockDataFlow();
 }
 
+uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
+  // Each level of nesting adds *100 to count, up to 3 levels deep.
+  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+  uint32_t weight = std::max(1U, depth * 100);
+  return weight;
+}
+
 /*
  * Count uses, weighting by loop nesting depth.  This code only
  * counts explicitly used s_regs.  A later phase will add implicit
@@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
-  // Each level of nesting adds *100 to count, up to 3 levels deep.
-  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-  uint32_t weight = std::max(1U, depth * 100);
+  uint32_t weight = GetUseCountWeight(bb);
   for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
     if (mir->ssa_rep == NULL) {
       continue;
@@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) {
       raw_use_counts_[s_reg] += 1u;
       use_counts_[s_reg] += weight;
     }
-    if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = GetDataFlowAttributes(mir);
-      // Implicit use of Method* ? */
-      if (df_attributes & DF_UMS) {
-        /*
-         * Some invokes will not use Method* - need to perform test similar
-         * to that found in GenInvoke() to decide whether to count refs
-         * for Method* on invoke-class opcodes.  This is a relatively expensive
-         * operation, so should only be done once.
-         * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
-         * and save results for both here and GenInvoke.  For now, go ahead
-         * and assume all invokes use method*.
-         */
-        raw_use_counts_[method_sreg_] += 1u;
-        use_counts_[method_sreg_] += weight;
-      }
-    }
   }
 }
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 58f12c94e4..4d340387f2 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1609,8 +1609,8 @@ void MIRGraph::ReplaceSpecialChars(std::string& str) {
 }
 
 std::string MIRGraph::GetSSAName(int ssa_reg) {
-  // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to
-  //       the arena. We should be smarter and just place straight into the arena, or compute the
+  // TODO: This value is needed for debugging. Currently, we compute this and then copy to the
+  //       arena. We should be smarter and just place straight into the arena, or compute the
   //       value more lazily.
   int vreg = SRegToVReg(ssa_reg);
   if (vreg >= static_cast<int>(GetFirstTempVR())) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3298af1162..85b13448da 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -960,6 +960,12 @@ class MIRGraph {
    */
   CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
 
+  /**
+   * @brief Used to remove last created compiler temporary when it's not needed.
+   * @param temp the temporary to remove.
+   */
+  void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
@@ -1079,7 +1085,6 @@ class MIRGraph {
   void EliminateDeadCodeEnd();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
-  void EliminateSuspendChecksEnd();
 
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1185,6 +1190,12 @@ class MIRGraph {
   void DoConstantPropagation(BasicBlock* bb);
 
   /**
+   * @brief Get use count weight for a given block.
+   * @param bb the BasicBlock.
+   */
+  uint32_t GetUseCountWeight(BasicBlock* bb) const;
+
+  /**
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
@@ -1396,10 +1407,6 @@ class MIRGraph {
       uint16_t* sfield_ids;  // Ditto.
       GvnDeadCodeElimination* dce;
     } gvn;
-    // Suspend check elimination.
-    struct {
-      DexFileMethodInliner* inliner;
-    } sce;
   } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
@@ -1451,6 +1458,7 @@ class MIRGraph {
   friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
   friend class TopologicalSortOrderTest;
+  friend class QuickCFITest;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 831ad42682..0c84b82edd 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,6 +16,8 @@
 
 # include "mir_method_info.h"
 
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
@@ -64,6 +66,9 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
   const DexFile* const dex_file = mUnit->GetDexFile();
   const bool use_jit = runtime->UseJit();
   const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
+  DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
+  DexFileMethodInliner* default_inliner =
+      (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
 
   for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
     // For quickened invokes, the dex method idx is actually the mir offset.
@@ -122,6 +127,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
     if (UNLIKELY(resolved_method == nullptr)) {
       continue;
     }
+
     compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
         &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
     if (!it->IsQuickened()) {
@@ -133,6 +139,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
       it->vtable_idx_ =
           compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
     }
+
     MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
     int fast_path_flags = compiler_driver->IsFastInvoke(
         soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
@@ -140,10 +147,23 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
     const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
     const bool is_class_initialized =
         compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
+
+    // Check if the target method is intrinsic or special.
+    InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
+    if (inliner_map != nullptr) {
+      auto* inliner = (target_method.dex_file == dex_file)
+          ? default_inliner
+          : inliner_map->GetMethodInliner(target_method.dex_file);
+      is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
+    }
+
     uint16_t other_flags = it->flags_ &
-        ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin));
+        ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
+            (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
         (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
+        ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
         (is_referrers_class ? kFlagIsReferrersClass : 0u) |
         (is_class_initialized ? kFlagClassIsInitialized : 0u);
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
index e131c96a81..7230c462cd 100644
--- a/compiler/dex/mir_method_info.h
+++ b/compiler/dex/mir_method_info.h
@@ -127,6 +127,14 @@ class MirMethodLoweringInfo : public MirMethodInfo {
     return (flags_ & kFlagFastPath) != 0u;
   }
 
+  bool IsIntrinsic() const {
+    return (flags_ & kFlagIsIntrinsic) != 0u;
+  }
+
+  bool IsSpecial() const {
+    return (flags_ & kFlagIsSpecial) != 0u;
+  }
+
   bool IsReferrersClass() const {
     return (flags_ & kFlagIsReferrersClass) != 0;
   }
@@ -188,9 +196,11 @@ class MirMethodLoweringInfo : public MirMethodInfo {
  private:
   enum {
     kBitFastPath = kMethodInfoBitEnd,
+    kBitIsIntrinsic,
+    kBitIsSpecial,
     kBitInvokeTypeBegin,
     kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3,  // 3 bits for invoke type.
-    kBitSharpTypeBegin,
+    kBitSharpTypeBegin = kBitInvokeTypeEnd,
     kBitSharpTypeEnd = kBitSharpTypeBegin + 3,  // 3 bits for sharp type.
     kBitIsReferrersClass = kBitSharpTypeEnd,
     kBitClassIsInitialized,
@@ -199,6 +209,8 @@ class MirMethodLoweringInfo : public MirMethodInfo {
   };
   static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
   static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
+  static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
+  static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
   static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
   static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
   static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index c85c3b6f21..9d7b4b4dfd 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Since VR temps cannot be requested once the BE temps are requested, we
     // allow reservation of VR temps as well for BE. We
     size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
-    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+    size_t needed_temps = wide ? 2u : 1u;
+    if (available_temps < needed_temps) {
       if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available.";
+        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
+            << " are available.";
       }
       return nullptr;
     }
@@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Update the remaining reserved temps since we have now used them.
     // Note that the code below is actually subtracting to remove them from reserve
     // once they have been claimed. It is careful to not go below zero.
-    if (reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
-    if (wide && reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
+    reserved_temps_for_backend_ =
+        std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
 
     // The new non-special compiler temp must receive a unique v_reg.
     compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
@@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
   return compiler_temp;
 }
 
+void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
+  // Once the compiler temps have been committed, it's too late for any modifications.
+  DCHECK_EQ(compiler_temps_committed_, false);
+
+  size_t used_temps = wide ? 2u : 1u;
+
+  if (ct_type == kCompilerTempBackend) {
+    DCHECK(requested_backend_temp_);
+
+    // Make the temps available to backend again.
+    reserved_temps_for_backend_ += used_temps;
+  } else if (ct_type == kCompilerTempVR) {
+    DCHECK(!requested_backend_temp_);
+  } else {
+    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
+  }
+
+  // Reduce the number of non-special compiler temps.
+  DCHECK_LE(used_temps, num_non_special_compiler_temps_);
+  num_non_special_compiler_temps_ -= used_temps;
+
+  // Check that this was really the last temp.
+  DCHECK_EQ(static_cast<size_t>(temp->v_reg),
+            GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
+
+  if (cu_->verbose) {
+    LOG(INFO) << "Last temporary has been removed.";
+  }
+}
+
 static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
   bool is_taken;
   switch (opcode) {
@@ -1489,7 +1517,7 @@ void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
       continue;
     }
     const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
-    if (!method_info.FastPath()) {
+    if (!method_info.FastPath() || !method_info.IsSpecial()) {
       continue;
     }
 
@@ -1631,10 +1659,6 @@ bool MIRGraph::EliminateSuspendChecksGate() {
       !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
     return false;
   }
-  if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
-    temp_.sce.inliner =
-        cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-  }
   suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
   return true;
 }
@@ -1652,9 +1676,9 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
   uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
   bool found_invoke = false;
   for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
-        (temp_.sce.inliner == nullptr ||
-         !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+    if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
+        IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
+        !GetMethodLoweringInfo(mir).IsIntrinsic()) {
       // Non-intrinsic invoke, rely on a suspend point in the invoked method.
       found_invoke = true;
       break;
@@ -1717,10 +1741,6 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
   return true;
 }
 
-void MIRGraph::EliminateSuspendChecksEnd() {
-  temp_.sce.inliner = nullptr;
-}
-
 bool MIRGraph::CanThrow(MIR* mir) const {
   if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
     return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9ce5ebbc1b..10a4337cf5 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -474,7 +474,6 @@ class SuspendCheckEliminationTest : public MirOptimizationTest {
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateSuspendChecks(bb);
     }
-    cu_.mir_graph->EliminateSuspendChecksEnd();
   }
 
   SuspendCheckEliminationTest()
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index e6158c3200..3d18af6169 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -29,6 +29,7 @@
 #include "mirror/object_array-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -354,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::ArmCore(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::ArmFp(num);
+}
+
 void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
@@ -402,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
     }
   }
   /* Spill core callee saves */
-  if (core_spill_mask_ == 0u) {
-    // Nothing to spill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
-    // Spilling only low regs and/or LR, use 16-bit PUSH.
-    constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
-    NewLIR1(kThumbPush,
-            (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Push cannot be used to spill a single register.
-    NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+      // Spilling only low regs and/or LR, use 16-bit PUSH.
+      constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+      NewLIR1(kThumbPush,
+              (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Push cannot be used to spill a single register.
+      NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Push, core_spill_mask_);
+    }
+    cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize);
   }
   /* Need to spill any FP regs? */
-  if (num_fp_spills_) {
+  if (num_fp_spills_ != 0u) {
     /*
      * NOTE: fp spills are a little different from core spills in that
      * they are pushed as a contiguous block.  When promoting from
      * the fp set, we must allocate all singles from s16..highest-promoted
      */
     NewLIR1(kThumb2VPushCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize);
   }
 
   const int spill_size = spill_count * 4;
@@ -444,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
             m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
           }
           m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
           // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
           // codegen and target are in thumb2 mode.
           // NOTE: native pointer.
           m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
        private:
@@ -464,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         // Need to restore LR since we used it as a temp.
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
         OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
       } else {
         /*
          * If the frame is small enough we are guaranteed to have enough space that remains to
@@ -474,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         MarkTemp(rs_rARM_LR);
         FreeTemp(rs_rARM_LR);
         OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
         Clobber(rs_rARM_LR);
         UnmarkTemp(rs_rARM_LR);
         LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
@@ -483,13 +501,23 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       // Implicit stack overflow check has already been done.  Just make room on the
       // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+      cfi_.AdjustCFAOffset(frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   FlushIns(ArgLocs, rl_method);
 
+  // We can promote a PC-relative reference to dex cache arrays to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
+                             dex_cache_arrays_base_reg_);
+  }
+
   FreeTemp(rs_r0);
   FreeTemp(rs_r1);
   FreeTemp(rs_r2);
@@ -498,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 }
 
 void ArmMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   int spill_count = num_core_spills_ + num_fp_spills_;
+
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -506,34 +536,47 @@ void ArmMir2Lir::GenExitSequence() {
   LockTemp(rs_r0);
   LockTemp(rs_r1);
 
-  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
+  int adjust = frame_size_ - (spill_count * kArmPointerSize);
+  OpRegImm(kOpAdd, rs_rARM_SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
   /* Need to restore any FP callee saves? */
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize);
+    cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
-    /* Unspill rARM_LR to rARM_PC */
+  bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
+  if (unspill_LR_to_PC) {
     core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
     core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
-  if (core_spill_mask_ == 0u) {
-    // Nothing to unspill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
-    // Unspilling only low regs and/or PC, use 16-bit POP.
-    constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
-    NewLIR1(kThumbPop,
-            (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Pop cannot be used to unspill a single register.
-    NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Pop, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+      // Unspilling only low regs and/or PC, use 16-bit POP.
+      constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+      NewLIR1(kThumbPop,
+              (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Pop cannot be used to unspill a single register.
+      NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Pop, core_spill_mask_);
+    }
+    // If we pop to PC, there is no further epilogue code.
+    if (!unspill_LR_to_PC) {
+      cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
+      cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
+      DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+    }
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
+  if (!unspill_LR_to_PC) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
     NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void ArmMir2Lir::GenSpecialExitSequence() {
@@ -555,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() {
   NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
           (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
           (1u << 8));                                             // LR encoded for 16-bit push.
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register r0.
+  cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize);
 }
 
 void ArmMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
 }
 
 static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -571,12 +619,12 @@ static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& targe
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
-                             int state, const MethodReference& target_method,
-                             uint32_t unused_idx ATTRIBUTE_UNUSED,
-                             uintptr_t direct_code, uintptr_t direct_method,
-                             InvokeType type) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                  int state, const MethodReference& target_method,
+                                  uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  InvokeType type) {
+  ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
@@ -597,17 +645,24 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
       return -1;
     }
   } else {
+    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
+      if (!use_pc_rel) {
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      }
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -619,14 +674,23 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
       }
-      break;
+      if (!use_pc_rel || direct_code != 0) {
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
-                          target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                            target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      } else {
+        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+      }
       break;
     case 3:  // Grab the code from the method*
       if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 4141bcfe98..83b27df939 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -82,6 +82,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
     void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
+    bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+    void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
     RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
@@ -257,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
      */
     LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+    void DoPromotion() OVERRIDE;
+
     /*
      * @brief Handle ARM specific literals.
      */
@@ -300,6 +306,13 @@ class ArmMir2Lir FINAL : public Mir2Lir {
 
     ArenaVector<LIR*> call_method_insns_;
 
+    // Instructions needing patching with PC relative code addresses.
+    ArenaVector<LIR*> dex_cache_access_insns_;
+
+    // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
+    // if promoted.
+    RegStorage dex_cache_arrays_base_reg_;
+
     /**
      * @brief Given float register pair, returns Solo64 float register.
      * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
@@ -329,6 +342,14 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     }
 
     int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
+    static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                 int state, const MethodReference& target_method,
+                                 uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                 uintptr_t direct_code, uintptr_t direct_method,
+                                 InvokeType type);
+
+    void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 9193e1b23c..47669db979 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1087,6 +1087,36 @@ void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   lir->target = target;
 }
 
+bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return dex_cache_arrays_layout_.Valid();
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
+  LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
+  ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
+  LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
+  add_pc->flags.fixup = kFixupLabel;
+  movw->operands[2] = WrapPointer(dex_file);
+  movw->operands[3] = offset;
+  movw->operands[4] = WrapPointer(add_pc);
+  movt->operands[2] = movw->operands[2];
+  movt->operands[3] = movw->operands[3];
+  movt->operands[4] = movw->operands[4];
+  dex_cache_access_insns_.push_back(movw);
+  dex_cache_access_insns_.push_back(movt);
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
+                r_dest, kNotVolatile);
+  } else {
+    OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
+    LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+  }
+}
+
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
   return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 9812d9ff99..5f27338e6b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -575,7 +575,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()) {
+      call_method_insns_(arena->Adapter()),
+      dex_cache_access_insns_(arena->Adapter()),
+      dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
   call_method_insns_.reserve(100);
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kArmLast; i++) {
@@ -901,14 +903,28 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
 }
 
 void ArmMir2Lir::InstallLiteralPools() {
+  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
   // PC-relative calls to methods.
-  patches_.reserve(call_method_insns_.size());
   for (LIR* p : call_method_insns_) {
-      DCHECK_EQ(p->opcode, kThumb2Bl);
-      uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
-                                                        target_dex_file, target_method_idx));
+    DCHECK_EQ(p->opcode, kThumb2Bl);
+    uint32_t target_method_idx = p->operands[1];
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+                                                      target_dex_file, target_method_idx));
+  }
+
+  // PC-relative dex cache array accesses.
+  for (LIR* p : dex_cache_access_insns_) {
+    DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H);
+    const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]);
+    DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH);
+    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    uint32_t offset = p->operands[3];
+    DCHECK(!p->flags.is_nop);
+    DCHECK(!add_pc->flags.is_nop);
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset,
+                                                       dex_file, add_pc->offset, offset));
   }
 
   // And do the normal processing.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index e4bd2a33ae..c3371cf329 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arm_lir.h"
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "driver/compiler_driver.h"
@@ -1266,4 +1267,38 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
   return offset;
 }
 
+void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 4 because the full PC-relative
+    // load sequence is 4 instructions long.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 4;
+    }
+  }
+}
+
+void ArmMir2Lir::DoPromotion() {
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6b47bba884..4abbd77d88 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -282,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+
   /*
    * On entry, x0 to x7 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
@@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 
   if (spilled_already != frame_size_) {
     OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   if (!skip_overflow_check) {
@@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
           GenerateTargetLabel(kPseudoThrowTarget);
           // Unwinds stack.
           m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
           m2l_->LockTemp(rs_xIP0);
           m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
           m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
           m2l_->FreeTemp(rs_xIP0);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
       private:
@@ -393,6 +402,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -403,6 +413,9 @@ void Arm64Mir2Lir::GenExitSequence() {
 
   // Finally return.
   NewLIR0(kA64Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void Arm64Mir2Lir::GenSpecialExitSequence() {
@@ -419,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
   core_vmap_table_.clear();
   fp_vmap_table_.clear();
   NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register x0.
+  cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
 }
 
 void Arm64Mir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.Restore(DwarfCoreReg(rxLR));
 }
 
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index a9d9f3d463..20f61f2261 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -1458,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
   return reg_mask;
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::Arm64Fp(num);
+}
+
 static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   int reg1 = -1, reg2 = -1;
   const int reg_log2_size = 3;
@@ -1466,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1483,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1493,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
 static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
                            int frame_size) {
   m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
+  m2l->cfi().AdjustCFAOffset(frame_size);
 
   int core_count = POPCOUNT(core_reg_mask);
 
@@ -1552,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
         cur_offset = 0;  // That core reg needs to go into the upper half.
       }
     } else {
@@ -1564,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
         fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       }
     }
   } else {
@@ -1580,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     } else {
       core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     }
   }
+  DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
+            static_cast<int>(all_offset * kArm64PointerSize));
 
   if (fp_count != 0) {
     for (; fp_reg_mask != 0;) {
@@ -1594,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       if (UNLIKELY(reg2 < 0)) {
         m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                      cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
         // Do not increment offset here, as the second half will be filled by a core reg.
       } else {
         m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
         cur_offset += 2;
       }
     }
@@ -1610,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
                    cur_offset + 1);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
       cur_offset += 2;  // Half-slot filled now.
     }
   }
@@ -1620,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
     core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
     m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                  RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
   }
 
   DCHECK_EQ(cur_offset, all_offset);
@@ -1650,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     } else {
       DCHECK_LE(offset, 63);
       m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg2));
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     }
   }
 }
@@ -1667,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     } else {
       m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfFpReg(reg2));
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     }
   }
 }
@@ -1711,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
     early_drop = RoundDown(early_drop, 16);
 
     OpRegImm64(kOpAdd, rs_sp, early_drop);
+    cfi_.AdjustCFAOffset(-early_drop);
   }
 
   // Unspill.
@@ -1724,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
   }
 
   // Drop the (rest of) the frame.
-  OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
+  int adjust = frame_size - early_drop;
+  OpRegImm64(kOpAdd, rs_sp, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f944c11931..ff5f735255 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1070,6 +1070,11 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       mask_cache_(arena),
       safepoints_(arena->Adapter()),
       dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
+      pc_rel_temp_(nullptr),
+      dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
+      cfi_(&last_lir_insn_,
+           cu->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation(),
+           arena),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -1154,14 +1159,6 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
     return lhs.LiteralOffset() < rhs.LiteralOffset();
   });
 
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ?
-          ReturnFrameDescriptionEntry() :
-          nullptr);
-  ArrayRef<const uint8_t> cfi_ref;
-  if (cfi_info.get() != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*cfi_info);
-  }
   return CompiledMethod::SwapAllocCompiledMethod(
       cu_->compiler_driver, cu_->instruction_set,
       ArrayRef<const uint8_t>(code_buffer_),
@@ -1170,7 +1167,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
       ArrayRef<const uint8_t>(encoded_mapping_table_),
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
-      cfi_ref,
+      ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())),
       ArrayRef<const LinkerPatch>(patches_));
 }
 
@@ -1332,11 +1329,6 @@ void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED,
   UNREACHABLE();
 }
 
-std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
-  // Default case is to do nothing.
-  return nullptr;
-}
-
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
   if (loc.location == kLocPhysReg) {
     DCHECK(!loc.reg.Is32Bit());
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8e3f4ef726..4ac6c0c5b5 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -413,6 +413,17 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier)
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
+InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  if (it != inline_methods_.end()) {
+    DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0);
+    return it->second.flags;
+  } else {
+    return kNoInlineMethodFlags;
+  }
+}
+
 bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index cb521da9df..d1e562119c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -65,6 +65,11 @@ class DexFileMethodInliner {
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
     /**
+     * Check whether a particular method index corresponds to an intrinsic or special function.
+     */
+    InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+    /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
     bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 1813e0939e..b132c4cc54 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -94,6 +94,97 @@ void Mir2Lir::GenIfNullUseHelperImmMethod(
                                                        r_method, r_result));
 }
 
+RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
+                                               int opt_flags) {
+  DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
+  // May do runtime call so everything to home locations.
+  FlushAllRegs();
+  RegStorage r_base = TargetReg(kArg0, kRef);
+  LockTemp(r_base);
+  RegStorage r_method = RegStorage::InvalidReg();  // Loaded lazily, maybe in the slow-path.
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+    OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
+  } else {
+    // Using fixed register to sync with possible call to runtime support.
+    r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+    LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
+                kNotVolatile);
+    int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
+    LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
+  }
+  // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved.
+  LIR* unresolved_branch = nullptr;
+  if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
+    // Check if r_base is nullptr.
+    unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr);
+  }
+  LIR* uninit_branch = nullptr;
+  if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
+    // Check if r_base is not yet initialized class.
+    RegStorage r_tmp = TargetReg(kArg2, kNotWide);
+    LockTemp(r_tmp);
+    uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                      mirror::Class::StatusOffset().Int32Value(),
+                                      mirror::Class::kStatusInitialized, nullptr, nullptr);
+    FreeTemp(r_tmp);
+  }
+  if (unresolved_branch != nullptr || uninit_branch != nullptr) {
+    //
+    // Slow path to ensure a class is initialized for sget/sput.
+    //
+    class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+     public:
+      // There are up to two branches to the static field slow path, the "unresolved" when the type
+      // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized.
+      // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path.
+      StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
+                          RegStorage r_base_in, RegStorage r_method_in)
+          : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
+            second_branch_(unresolved != nullptr ? uninit : nullptr),
+            storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+      }
+
+      void Compile() {
+        LIR* target = GenerateTargetLabel();
+        if (second_branch_ != nullptr) {
+          second_branch_->target = target;
+        }
+        if (r_method_.Valid()) {
+          // ArtMethod* was loaded in normal path - use it.
+          m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
+                                        true);
+        } else {
+          // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
+          m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
+        }
+        // Copy helper's result into r_base, a no-op on all but MIPS.
+        m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
+
+        m2l_->OpUnconditionalBranch(cont_);
+      }
+
+     private:
+      // Second branch to the slow path, or nullptr if there's only one branch.
+      LIR* const second_branch_;
+
+      const int storage_index_;
+      const RegStorage r_base_;
+      RegStorage r_method_;
+    };
+
+    // The slow path is invoked if the r_base is nullptr or the class pointed
+    // to by it is not initialized.
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
+                                                 field_info.StorageIndex(), r_base, r_method));
+  }
+  if (IsTemp(r_method)) {
+    FreeTemp(r_method);
+  }
+  return r_base;
+}
+
 /*
  * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
@@ -609,41 +700,6 @@ void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_
   CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true);
 }
 
-//
-// Slow path to ensure a class is initialized for sget/sput.
-//
-class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
- public:
-  // There are up to two branches to the static field slow path, the "unresolved" when the type
-  // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
-  // At least one will be non-null here, otherwise we wouldn't generate the slow path.
-  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                      RegStorage r_base)
-      : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
-        second_branch_(unresolved != nullptr ? uninit : nullptr),
-        storage_index_(storage_index), r_base_(r_base) {
-  }
-
-  void Compile() {
-    LIR* target = GenerateTargetLabel();
-    if (second_branch_ != nullptr) {
-      second_branch_->target = target;
-    }
-    m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
-    // Copy helper's result into r_base, a no-op on all but MIPS.
-    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
-
-    m2l_->OpUnconditionalBranch(cont_);
-  }
-
- private:
-  // Second branch to the slow path, or null if there's only one branch.
-  LIR* const second_branch_;
-
-  const int storage_index_;
-  const RegStorage r_base_;
-};
-
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
@@ -653,65 +709,23 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
-      if (IsTemp(rl_method.reg)) {
-        FreeTemp(rl_method.reg);
-      }
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized.
-      // TODO: remove initialized check now that we are initializing classes in the compiler driver.
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
-      }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and store of value don't re-order.
-          // TODO: Presumably the actual value store is control-dependent on the status load,
-          // and will thus not be reordered in any case, since stores are never speculated.
-          // Does later code "know" that the class is now initialized?  If so, we still
-          // need the barrier to guard later static loads.
-          GenMemBarrier(kLoadAny);
-        }
+        // Ensure load of status and store of value don't re-order.
+        // TODO: Presumably the actual value store is control-dependent on the status load,
+        // and will thus not be reordered in any case, since stores are never speculated.
+        // Does later code "know" that the class is now initialized?  If so, we still
+        // need the barrier to guard later static loads.
+        GenMemBarrier(kLoadAny);
       }
-      FreeTemp(r_method);
     }
     // rBase now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -773,57 +787,19 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method  = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
-      }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and load of value don't re-order.
-          GenMemBarrier(kLoadAny);
-        }
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadAny);
       }
-      FreeTemp(r_method);
     }
     // r_base now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e747239894..db7095dafb 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1435,10 +1435,12 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
 
 void Mir2Lir::GenInvoke(CallInfo* info) {
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  const DexFile* dex_file = info->method_ref.dex_file;
-  if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file)
-      ->GenIntrinsic(this, info)) {
-    return;
+  if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
+    const DexFile* dex_file = info->method_ref.dex_file;
+    auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
+    if (inliner->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   GenInvokeNoInline(info);
 }
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
new file mode 100644
index 0000000000..03cf4bef8b
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lazy_debug_frame_opcode_writer.h"
+#include "mir_to_lir.h"
+
+namespace art {
+namespace dwarf {
+
+const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) {
+  if (!enable_writes_) {
+    DCHECK(this->data()->empty());
+    return this->data();
+  }
+  if (!patched_) {
+    patched_ = true;
+    // Move our data buffer to temporary variable.
+    ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator());
+    old_opcodes.swap(this->opcodes_);
+    // Refill our data buffer with patched opcodes.
+    this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4);
+    size_t pos = 0;
+    for (auto advance : advances_) {
+      DCHECK_GE(advance.pos, pos);
+      // Copy old data up to the point when advance was issued.
+      this->opcodes_.insert(this->opcodes_.end(),
+                            old_opcodes.begin() + pos,
+                            old_opcodes.begin() + advance.pos);
+      pos = advance.pos;
+      // This may be null if there is no slow-path code after return.
+      LIR* next_lir = NEXT_LIR(advance.last_lir_insn);
+      // Insert the advance command with its final offset.
+      Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size);
+    }
+    // Copy the final segment.
+    this->opcodes_.insert(this->opcodes_.end(),
+                          old_opcodes.begin() + pos,
+                          old_opcodes.end());
+    Base::AdvancePC(code_size);
+  }
+  return this->data();
+}
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
new file mode 100644
index 0000000000..d71a87d567
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dwarf/debug_frame_opcode_writer.h"
+
+namespace art {
+struct LIR;
+namespace dwarf {
+
+// When we are generating the CFI code, we do not know the instuction offsets,
+// this class stores the LIR references and patches the instruction stream later.
+class LazyDebugFrameOpCodeWriter FINAL
+    : private DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
+  typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+ public:
+  // This method is implicitely called the by opcode writers.
+  virtual void ImplicitlyAdvancePC() OVERRIDE {
+    DCHECK_EQ(patched_, false);
+    DCHECK_EQ(this->current_pc_, 0);
+    advances_.push_back({this->data()->size(), *last_lir_insn_});
+  }
+
+  // The register was unspilled.
+  void Restore(Reg reg) {
+    if (enable_writes_) {
+      Base::Restore(reg);
+    }
+  }
+
+  // Custom alias - unspill many registers based on bitmask.
+  void RestoreMany(Reg reg_base, uint32_t reg_mask) {
+    if (enable_writes_) {
+      Base::RestoreMany(reg_base, reg_mask);
+    }
+  }
+
+  // Remember the state of register spills.
+  void RememberState() {
+    if (enable_writes_) {
+      Base::RememberState();
+    }
+  }
+
+  // Restore the state of register spills.
+  void RestoreState() {
+    if (enable_writes_) {
+      Base::RestoreState();
+    }
+  }
+
+  // Set the frame pointer (CFA) to (stack_pointer + offset).
+  void DefCFAOffset(int offset) {
+    if (enable_writes_) {
+      Base::DefCFAOffset(offset);
+    }
+    this->current_cfa_offset_ = offset;
+  }
+
+  // The stack size was increased by given delta.
+  void AdjustCFAOffset(int delta) {
+    DefCFAOffset(this->current_cfa_offset_ + delta);
+  }
+
+  // The register was spilled to (stack_pointer + offset).
+  void RelOffset(Reg reg, int offset) {
+    if (enable_writes_) {
+      Base::RelOffset(reg, offset);
+    }
+  }
+
+  // Custom alias - spill many registers based on bitmask.
+  void RelOffsetForMany(Reg reg_base, int offset, uint32_t reg_mask, int reg_size) {
+    if (enable_writes_) {
+      Base::RelOffsetForMany(reg_base, offset, reg_mask, reg_size);
+    }
+  }
+
+  using Base::GetCurrentCFAOffset;
+  using Base::SetCurrentCFAOffset;
+  using Base::GetCurrentPC;
+
+  const ArenaVector<uint8_t>* Patch(size_t code_size);
+
+  explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes,
+                                      ArenaAllocator* allocator)
+    : Base(allocator->Adapter()),
+      last_lir_insn_(last_lir_insn),
+      enable_writes_(enable_writes),
+      advances_(allocator->Adapter()),
+      patched_(false) {
+  }
+
+ private:
+  typedef struct {
+    size_t pos;
+    LIR* last_lir_insn;
+  } Advance;
+
+  LIR** last_lir_insn_;
+  bool enable_writes_;
+  ArenaVector<Advance> advances_;
+  bool patched_;
+
+  DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c932df6dc9..7d4f20e335 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live.
@@ -304,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
         // RA is offset 0 since we push in reverse order.
         m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr));
         m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow);  // Doesn't clobber LR.
         m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */,
                          false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -318,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
     AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size));
     // TODO: avoid copy for small frame sizes.
     OpRegCopy(rs_sp, new_sp);  // Establish stack.
+    cfi_.AdjustCFAOffset(frame_sub);
   } else {
     OpRegImm(kOpSub, rs_sp, frame_sub);
+    cfi_.AdjustCFAOffset(frame_sub);
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -337,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
 }
 
 void MipsMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -346,6 +356,9 @@ void MipsMir2Lir::GenExitSequence() {
 
   UnSpillCoreRegs();
   OpReg(kOpBx, TargetPtrReg(kLr));
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void MipsMir2Lir::GenSpecialExitSequence() {
@@ -364,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() {
   fp_vmap_table_.clear();
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(frame_size_);
   StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4));
   StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0));
+  // Do not generate CFI for scratch register A0.
 }
 
 void MipsMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
   const RegStorage rs_sp = TargetPtrReg(kSp);
   LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.Restore(DwarfCoreReg(rRA));
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 /*
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index a94fad7534..4c0bd8378b 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora
   return OpReg(kOpBlx, r_tgt);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() {
   int offset = num_core_spills_ * ptr_size;
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, offset);
+  cfi_.AdjustCFAOffset(offset);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       offset -= ptr_size;
       StoreWordDisp(rs_sp, offset,
                     cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.RelOffset(DwarfCoreReg(reg), offset);
     }
   }
 }
@@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() {
       offset -= ptr_size;
       LoadWordDisp(rs_sp, offset,
                    cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.Restore(DwarfCoreReg(reg));
     }
   }
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index ed8e21e817..961cd4f06b 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1253,11 +1253,14 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
     AppendLIR(NewLIR0(kPseudoPrologueBegin));
     GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc());
     AppendLIR(NewLIR0(kPseudoPrologueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   } else if (bb->block_type == kExitBlock) {
     ResetRegPool();
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
     AppendLIR(NewLIR0(kPseudoEpilogueBegin));
     GenExitSequence();
     AppendLIR(NewLIR0(kPseudoEpilogueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   }
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index bb8fbae8f6..5995f33e18 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -29,6 +29,7 @@
 #include "dex/quick/resource_mask.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "invoke_type.h"
+#include "lazy_debug_frame_opcode_writer.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -135,6 +136,7 @@ class BasicBlock;
 class BitVector;
 struct CallInfo;
 struct CompilationUnit;
+struct CompilerTemp;
 struct InlineMethod;
 class MIR;
 struct LIR;
@@ -142,6 +144,7 @@ struct RegisterInfo;
 class DexFileMethodInliner;
 class MIRGraph;
 class MirMethodLoweringInfo;
+class MirSFieldLoweringInfo;
 
 typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
                             const MethodReference& target_method,
@@ -774,9 +777,10 @@ class Mir2Lir {
      */
     virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
+    void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
+    virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
-    void DoPromotion();
+    virtual void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(RegisterClass reg_class);
@@ -1505,6 +1509,12 @@ class Mir2Lir {
       return 0;
     }
 
+    /**
+     * @brief Buffer of DWARF's Call Frame Information opcodes.
+     * @details It is used by debuggers and other tools to unwind the call stack.
+     */
+    dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; }
+
   protected:
     Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -1570,11 +1580,6 @@ class Mir2Lir {
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest,
                                     RegLocation rl_src);
-    /*
-     * @brief Generate the eh_frame FDE information if possible.
-     * @returns pointer to vector containg FDE information, or NULL.
-     */
-    virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
 
     /**
      * @brief Used to insert marker that can be used to associate MIR with LIR.
@@ -1692,6 +1697,13 @@ class Mir2Lir {
     void GenIfNullUseHelperImmMethod(
         RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
 
+    /**
+     * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
+     * @param field_info information about the field to be accessed.
+     * @param opt_flags the optimization flags of the MIR.
+     */
+    RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags);
+
     void AddDivZeroCheckSlowPath(LIR* branch);
 
     // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
@@ -1765,6 +1777,13 @@ class Mir2Lir {
     // Update references from prev_mir to mir.
     void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
 
+    /**
+     * Returns true if the frame spills the given core register.
+     */
+    bool CoreSpillMaskContains(int reg) {
+      return (core_spill_mask_ & (1u << reg)) != 0;
+    }
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1841,6 +1860,20 @@ class Mir2Lir {
     // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
     const DexCacheArraysLayout dex_cache_arrays_layout_;
 
+    // For architectures that don't have true PC-relative addressing, we can promote
+    // a PC of an instruction (or another PC-relative address such as a pointer to
+    // the dex cache arrays if supported) to a register. This is indicated to the
+    // register promotion by allocating a backend temp.
+    CompilerTemp* pc_rel_temp_;
+
+    // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
+    // above) and also have a limited range of offsets for loads, it's be useful to
+    // know the minimum offset into the dex cache arrays, so we calculate that as well
+    // if pc_rel_temp_ isn't nullptr.
+    uint32_t dex_cache_arrays_min_offset_;
+
+    dwarf::LazyDebugFrameOpCodeWriter cfi_;
+
     // ABI support
     class ShortyArg {
       public:
@@ -1900,6 +1933,8 @@ class Mir2Lir {
 
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
+
+    friend class QuickCFITest;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
new file mode 100644
index 0000000000..0540a8c962
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "cfi_test.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
+#include "dex/pass_manager.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick/quick_compiler.h"
+#include "dex/quick/mir_to_lir.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "gtest/gtest.h"
+
+#include "dex/quick/quick_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class QuickCFITest : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Setup simple compiler context.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    CompilerOptions compiler_options(
+      CompilerOptions::kDefaultCompilerFilter,
+      CompilerOptions::kDefaultHugeMethodThreshold,
+      CompilerOptions::kDefaultLargeMethodThreshold,
+      CompilerOptions::kDefaultSmallMethodThreshold,
+      CompilerOptions::kDefaultTinyMethodThreshold,
+      CompilerOptions::kDefaultNumDexMethodsThreshold,
+      true,  // generate_gdb_information.
+      false,
+      CompilerOptions::kDefaultTopKProfileThreshold,
+      false,
+      true,  // include_debug_symbols.
+      false,
+      false,
+      false,
+      false,
+      nullptr,
+      new PassManagerOptions(),
+      nullptr,
+      false);
+    VerificationResults verification_results(&compiler_options);
+    DexFileToMethodInlinerMap method_inliner_map;
+    std::unique_ptr<const InstructionSetFeatures> isa_features;
+    std::string error;
+    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
+                          Compiler::kQuick, isa, isa_features.get(),
+                          false, 0, 0, 0, false, false, "", 0, -1, "");
+    ClassLinker* linker = nullptr;
+    CompilationUnit cu(&pool, isa, &driver, linker);
+    DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } };  // NOLINT
+    cu.mir_graph.reset(new MIRGraph(&cu, &arena));
+    cu.mir_graph->current_code_item_ = &code_item;
+
+    // Generate empty method with some spills.
+    Mir2Lir* m2l = QuickCompiler::GetCodeGenerator(&cu, NULL);
+    m2l->frame_size_ = 64u;
+    m2l->CompilerInitializeRegAlloc();
+    for (const auto& info : m2l->reg_pool_->core_regs_) {
+      if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_core_spills_++;
+      }
+    }
+    for (const auto& info : m2l->reg_pool_->sp_regs_) {
+      if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_fp_spills_++;
+      }
+    }
+    m2l->AdjustSpillMask();
+    m2l->GenEntrySequence(NULL, m2l->LocCReturnRef());
+    m2l->GenExitSequence();
+    m2l->HandleSlowPaths();
+    m2l->AssembleLIR();
+    std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    auto const& cfi_data = m2l->cfi().Patch(actual_asm.size());
+    std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end());
+    EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size()));
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(QuickCFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
new file mode 100644
index 0000000000..634fdeead0
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -0,0 +1,217 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: lsls r0, r0, #0
+// 0x00000014: .cfi_restore_state
+// 0x00000014: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
+    0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
+    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+    0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: sub sp, sp, #0x40 (64)
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp d8, d9, [sp, #24]
+// 0x00000008: .cfi_offset_extended: r72 at cfa-40
+// 0x00000008: .cfi_offset_extended: r73 at cfa-32
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x0000000c: str lr, [sp, #56]
+// 0x00000010: .cfi_offset: r30 at cfa-8
+// 0x00000010: str w0, [sp]
+// 0x00000014: .cfi_remember_state
+// 0x00000014: ldp d8, d9, [sp, #24]
+// 0x00000018: .cfi_restore_extended: r72
+// 0x00000018: .cfi_restore_extended: r73
+// 0x00000018: ldp x20, x21, [sp, #40]
+// 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
+// 0x0000001c: ldr lr, [sp, #56]
+// 0x00000020: .cfi_restore: r30
+// 0x00000020: add sp, sp, #0x40 (64)
+// 0x00000024: .cfi_def_cfa_offset: 0
+// 0x00000024: ret
+// 0x00000028: .cfi_restore_state
+// 0x00000028: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89,
+    0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4,
+    0x3C, 0xC3, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44,
+    0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: sub esp, 60
+// 0x00000003: .cfi_def_cfa_offset: 64
+// 0x00000003: mov [esp + 52], ebp
+// 0x00000007: .cfi_offset: r5 at cfa-12
+// 0x00000007: mov [esp + 56], esi
+// 0x0000000b: .cfi_offset: r6 at cfa-8
+// 0x0000000b: mov [esp], eax
+// 0x0000000e: .cfi_remember_state
+// 0x0000000e: mov ebp, [esp + 52]
+// 0x00000012: .cfi_restore: r5
+// 0x00000012: mov esi, [esp + 56]
+// 0x00000016: .cfi_restore: r6
+// 0x00000016: add esp, 60
+// 0x00000019: .cfi_def_cfa_offset: 4
+// 0x00000019: ret
+// 0x0000001a: addb [eax], al
+// 0x0000001c: .cfi_restore_state
+// 0x0000001c: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C,
+    0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F,
+    0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B,
+    0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10,
+    0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83,
+    0xC4, 0x38, 0xC3, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A,
+    0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47,
+    0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: subq rsp, 56
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: movq [rsp + 40], rbx
+// 0x00000009: .cfi_offset: r3 at cfa-24
+// 0x00000009: movq [rsp + 48], rbp
+// 0x0000000e: .cfi_offset: r6 at cfa-16
+// 0x0000000e: movsd [rsp + 24], xmm12
+// 0x00000015: .cfi_offset: r29 at cfa-40
+// 0x00000015: movsd [rsp + 32], xmm13
+// 0x0000001c: .cfi_offset: r30 at cfa-32
+// 0x0000001c: movq rax, rdi
+// 0x0000001f: mov [rsp], edi
+// 0x00000022: .cfi_remember_state
+// 0x00000022: movq rbx, [rsp + 40]
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: movq rbp, [rsp + 48]
+// 0x0000002c: .cfi_restore: r6
+// 0x0000002c: movsd xmm12, [rsp + 24]
+// 0x00000033: .cfi_restore: r29
+// 0x00000033: movsd xmm13, [rsp + 32]
+// 0x0000003a: .cfi_restore: r30
+// 0x0000003a: addq rsp, 56
+// 0x0000003e: .cfi_def_cfa_offset: 8
+// 0x0000003e: ret
+// 0x0000003f: addb al, al
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF,
+    0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F,
+    0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -12
+// 0x00000004: .cfi_def_cfa_offset: 12
+// 0x00000004: sw r18, +8(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-4
+// 0x00000008: sw r19, +4(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-8
+// 0x0000000c: sw r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-12
+// 0x00000010: addiu r29, r29, -52
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r18, +60(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: lw r19, +56(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: lw r31, +52(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: addiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jalr r0, r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF,
+    0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF,
+    0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -24
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: sd r18, +16(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-8
+// 0x00000008: sd r19, +8(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-16
+// 0x0000000c: sd r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-24
+// 0x00000010: daddiu r29, r29, -40
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: ld r18, +56(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: ld r19, +48(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: ld r31, +40(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: daddiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jr r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 8baafc7fd2..2c0bd47405 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -798,11 +798,16 @@ bool QuickCompiler::WriteElf(art::File* file,
                              const std::vector<const art::DexFile*>& dex_files,
                              const std::string& android_root,
                              bool is_host) const {
-  return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                       *GetCompilerDriver());
+  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+                                         *GetCompilerDriver());
+  } else {
+    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+                                         *GetCompilerDriver());
+  }
 }
 
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
+Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
   UNUSED(compilation_unit);
   Mir2Lir* mir_to_lir = nullptr;
   switch (cu->instruction_set) {
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 5153a9e82e..09b08ace77 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -60,7 +60,7 @@ class QuickCompiler : public Compiler {
     OVERRIDE
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const;
+  static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
 
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 741657bc69..e779479780 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -19,9 +19,11 @@
 #include "mir_to_lir-inl.h"
 
 #include "dex/compiler_ir.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -1128,6 +1130,152 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
   return loc;
 }
 
+void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  // NOTE: This should be in sync with functions that actually generate code for
+  // the opcodes below. However, if we get this wrong, the generated code will
+  // still be correct even if it may be sub-optimal.
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_method = false;
+  bool uses_pc_rel_load = false;
+  uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
+  switch (opcode) {
+    case Instruction::CHECK_CAST:
+    case Instruction::INSTANCE_OF: {
+      if ((opcode == Instruction::CHECK_CAST) &&
+          (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+        break;  // No code generated.
+      }
+      uint32_t type_idx =
+          (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
+      bool type_known_final, type_known_abstract, use_declaring_class;
+      bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
+          cu_->method_idx, *cu_->dex_file, type_idx,
+          &type_known_final, &type_known_abstract, &use_declaring_class);
+      if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
+          cu_->compiler_driver->IsSafeCast(
+              mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
+        break;  // No code generated.
+      }
+      if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::CONST_CLASS:
+      if (CanUseOpPcRelDexCacheArrayLoad() &&
+          cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+                                                           mir->dalvikInsn.vB)) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
+      InvokeType sharp_type = info.GetSharpType();
+      if (info.IsIntrinsic()) {
+        // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
+      } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+        // Nothing to do, the generated code or entrypoint uses method from the stack.
+      } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
+        // Nothing to do, the generated code uses method from the stack.
+      } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;
+        dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE:
+    case Instruction::NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY_RANGE:
+      uses_method = true;
+      break;
+    case Instruction::FILL_ARRAY_DATA:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+    case Instruction::THROW:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+      bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
+          ? field_info.FastGet()
+          : field_info.FastPut();
+      if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
+        if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
+          uses_pc_rel_load = true;  // And ignore method use in slow path.
+          dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+        } else {
+          uses_method = true;
+        }
+      } else {
+        // Nothing to do, the entrypoint uses method from the stack.
+      }
+      break;
+    }
+
+    default:
+      break;
+  }
+  if (uses_method) {
+    core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
+  }
+  if (uses_pc_rel_load) {
+    if (pc_rel_temp_ != nullptr) {
+      core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+      DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+      dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+    } else {
+      // Nothing to do, using PC-relative addressing without promoting base PC to register.
+    }
+  }
+}
+
 /* USE SSA names to count references of base Dalvik v_regs. */
 void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
@@ -1157,6 +1305,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num
       }
     }
   }
+
+  // Now analyze the ArtMethod* and pc_rel_temp_ uses.
+  DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
+  if (pc_rel_temp_ != nullptr) {
+    DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
+  }
+  PreOrderDfsIterator iter(mir_graph_);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    if (bb->block_type == kDead) {
+      continue;
+    }
+    uint32_t weight = mir_graph_->GetUseCountWeight(bb);
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      AnalyzeMIR(core_counts, mir, weight);
+    }
+  }
 }
 
 /* qsort callback function, sort descending */
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index fd23692d24..7f42536c35 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -150,6 +150,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
    * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live.  Let the register
@@ -184,8 +188,9 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   }
 
   /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ -
-                              GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set));
+  OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.DefCFAOffset(frame_size_);
 
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -202,10 +207,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
         GenerateTargetLabel(kPseudoThrowTarget);
         const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
         m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         // Assumes codegen and target are in thumb2 mode.
         m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow,
                          false /* MarkSafepointPC */, false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -252,6 +259,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 }
 
 void X86Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -263,9 +271,14 @@ void X86Mir2Lir::GenExitSequence() {
   UnSpillFPRegs();
   /* Remove frame except for return address */
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  stack_increment_ = OpRegImm(kOpAdd, rs_rSP,
-                              frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
+  OpRegImm(kOpAdd, rs_rSP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
+  // There is only the return PC on the stack now.
   NewLIR0(kX86Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void X86Mir2Lir::GenSpecialExitSequence() {
@@ -276,6 +289,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
   // Keep 16-byte stack alignment, there's already the return address, so
   //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
   //   - for 64-bit push RAX, i.e. ArtMethod*.
+  const int kRegSize = cu_->target64 ? 8 : 4;
+  cfi_.SetCurrentCFAOffset(kRegSize);  // Return address.
   if (!cu_->target64) {
     DCHECK(!IsTemp(rs_rSI));
     DCHECK(!IsTemp(rs_rDI));
@@ -293,17 +308,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
   fp_vmap_table_.clear();
   if (!cu_->target64) {
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0);
     NewLIR1(kX86Push32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0);
   }
   NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(kRegSize);
+  // Do not generate CFI for scratch register.
 }
 
 void X86Mir2Lir::GenSpecialExitForSuspend() {
+  const int kRegSize = cu_->target64 ? 8 : 4;
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(-kRegSize);
   if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
   }
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 758684e835..a98a99ec4e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -380,12 +380,6 @@ class X86Mir2Lir : public Mir2Lir {
    */
   void InstallLiteralPools() OVERRIDE;
 
-  /*
-   * @brief Generate the debug_frame FDE information.
-   * @returns pointer to vector containing CFE information
-   */
-  std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
-
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  protected:
@@ -958,12 +952,6 @@ class X86Mir2Lir : public Mir2Lir {
   // Instructions needing patching with PC relative code addresses.
   ArenaVector<LIR*> dex_cache_access_insns_;
 
-  // Prologue decrement of stack pointer.
-  LIR* stack_decrement_;
-
-  // Epilogue increment of stack pointer.
-  LIR* stack_increment_;
-
   // The list of const vector literals.
   LIR* const_vectors_;
 
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 5def5c8bb0..931294e2ff 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
   return rl_result;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
 
@@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
 
     // Do we have a free register for intermediate calculations?
     RegStorage tmp = AllocTemp(false);
+    const int kRegSize = cu_->target64 ? 8 : 4;
     if (tmp == RegStorage::InvalidReg()) {
        /*
         * No, will use 'edi'.
@@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
               IsTemp(rl_result.reg.GetHigh()));
        tmp = rs_rDI;
        NewLIR1(kX86Push32R, tmp.GetReg());
+       cfi_.AdjustCFAOffset(kRegSize);
+       // Record cfi only if it is not already spilled.
+       if (!CoreSpillMaskContains(tmp.GetReg())) {
+         cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
+       }
     }
 
     // Now we are ready to do calculations.
@@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
     // Let's put pop 'edi' here to break a bit the dependency chain.
     if (tmp == rs_rDI) {
       NewLIR1(kX86Pop32R, tmp.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(tmp.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
+      }
     } else {
       FreeTemp(tmp);
     }
@@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   // If is_long, high half is in info->args[7]
+  const int kRegSize = cu_->target64 ? 8 : 4;
 
   if (is_long && cu_->target64) {
     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
@@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
     FreeTemp(rs_r0q);
   } else if (is_long) {
     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
-    // TODO: CFI support.
     FlushAllRegs();
     LockCallTemps();
     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
@@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
       NewLIR1(kX86Push32R, rs_rDI.GetReg());
       MarkTemp(rs_rDI);
       LockTemp(rs_rDI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+      }
     }
     if (push_si) {
       NewLIR1(kX86Push32R, rs_rSI.GetReg());
       MarkTemp(rs_rSI);
       LockTemp(rs_rSI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
+      }
     }
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
@@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
       FreeTemp(rs_rSI);
       UnmarkTemp(rs_rSI);
       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
+      }
     }
     if (push_di) {
       FreeTemp(rs_rDI);
       UnmarkTemp(rs_rDI);
       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
+      }
     }
     FreeCallTemps();
   } else {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index cad82a183e..926b75e35f 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -32,7 +32,6 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "x86_lir.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 
@@ -725,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) {
   return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num);
+}
+
 void X86Mir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -735,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() {
       frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      StoreBaseDisp(rs_rSP, offset,
-                    cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile);
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset);
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -754,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() {
   int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile);
+      cfi_.Restore(DwarfCoreReg(cu_->target64, reg));
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -771,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() {
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile);
+      cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset);
       offset += sizeof(double);
     }
   }
@@ -786,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() {
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg),
                    k64, kNotVolatile);
+      cfi_.Restore(DwarfFpReg(cu_->target64, reg));
       offset += sizeof(double);
     }
   }
@@ -830,7 +840,6 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator*
       class_type_address_insns_(arena->Adapter()),
       call_method_insns_(arena->Adapter()),
       dex_cache_access_insns_(arena->Adapter()),
-      stack_decrement_(nullptr), stack_increment_(nullptr),
       const_vectors_(nullptr) {
   method_address_insns_.reserve(100);
   class_type_address_insns_.reserve(100);
@@ -1317,6 +1326,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   if (!cu_->target64) {
     // EDI is promotable in 32-bit mode.
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(4);
+    // Record cfi only if it is not already spilled.
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+    }
   }
 
   if (zero_based) {
@@ -1412,8 +1426,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   // And join up at the end.
   all_done->target = NewLIR0(kPseudoTargetLabel);
 
-  if (!cu_->target64)
+  if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-4);
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()));
+    }
+  }
 
   // Out of line code returns here.
   if (slowpath_branch != nullptr) {
@@ -1426,100 +1445,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   return true;
 }
 
-static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
-  if (is_x86_64) {
-    switch (art_reg_id) {
-    case 3 : *dwarf_reg_id =  3; return true;  // %rbx
-    // This is the only discrepancy between ART & DWARF register numbering.
-    case 5 : *dwarf_reg_id =  6; return true;  // %rbp
-    case 12: *dwarf_reg_id = 12; return true;  // %r12
-    case 13: *dwarf_reg_id = 13; return true;  // %r13
-    case 14: *dwarf_reg_id = 14; return true;  // %r14
-    case 15: *dwarf_reg_id = 15; return true;  // %r15
-    default: return false;  // Should not get here
-    }
-  } else {
-    switch (art_reg_id) {
-    case 5: *dwarf_reg_id = 5; return true;  // %ebp
-    case 6: *dwarf_reg_id = 6; return true;  // %esi
-    case 7: *dwarf_reg_id = 7; return true;  // %edi
-    default: return false;  // Should not get here
-    }
-  }
-}
-
-std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Generate the FDE for the method.
-  DCHECK_NE(data_offset_, 0U);
-
-  WriteFDEHeader(cfi_info, cu_->target64);
-  WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64);
-
-  // The instructions in the FDE.
-  if (stack_decrement_ != nullptr) {
-    // Advance LOC to just past the stack decrement.
-    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
-    DW_CFA_advance_loc(cfi_info, pc);
-
-    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
-    DW_CFA_def_cfa_offset(cfi_info, frame_size_);
-
-    // Handle register spills
-    const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
-    const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4;
-    uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-    int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-    for (int reg = 0; mask; mask >>= 1, reg++) {
-      if (mask & 0x1) {
-        pc += kSpillInstLen;
-
-        // Advance LOC to pass this instruction
-        DW_CFA_advance_loc(cfi_info, kSpillInstLen);
-
-        int dwarf_reg_id;
-        if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
-          // DW_CFA_offset_extended_sf reg offset
-          DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
-        }
-
-        offset += GetInstructionSetPointerSize(cu_->instruction_set);
-      }
-    }
-
-    // We continue with that stack until the epilogue.
-    if (stack_increment_ != nullptr) {
-      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
-      DW_CFA_advance_loc(cfi_info, new_pc - pc);
-
-      // We probably have code snippets after the epilogue, so save the
-      // current state: DW_CFA_remember_state.
-      DW_CFA_remember_state(cfi_info);
-
-      // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
-      // There is only the return PC on the stack now.
-      DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
-
-      // Everything after that is the same as before the epilogue.
-      // Stack bump was followed by RET instruction.
-      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
-      if (post_ret_insn != nullptr) {
-        pc = new_pc;
-        new_pc = post_ret_insn->offset;
-        DW_CFA_advance_loc(cfi_info, new_pc - pc);
-        // Restore the state: DW_CFA_restore_state.
-        DW_CFA_restore_state(cfi_info);
-      }
-    }
-  }
-
-  PadCFI(cfi_info);
-  WriteCFILength(cfi_info, cu_->target64);
-
-  return cfi_info;
-}
-
 void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
   switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
     case kMirOpReserveVectorRegisters:
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f6b217a635..c2b837512c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2370,44 +2370,6 @@ bool CompilerDriver::WriteElf(const std::string& android_root,
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
 }
-void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                                std::string* target_triple,
-                                                std::string* target_cpu,
-                                                std::string* target_attr) {
-  switch (instruction_set) {
-    case kThumb2:
-      *target_triple = "thumb-none-linux-gnueabi";
-      *target_cpu = "cortex-a9";
-      *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kArm:
-      *target_triple = "armv7-none-linux-gnueabi";
-      // TODO: Fix for Nexus S.
-      *target_cpu = "cortex-a9";
-      // TODO: Fix for Xoom.
-      *target_attr = "+v7,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kX86:
-      *target_triple = "i386-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kX86_64:
-      *target_triple = "x86_64-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kMips:
-      *target_triple = "mipsel-unknown-linux";
-      *target_attr = "mips32r2";
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown instruction set: " << instruction_set;
-    }
-  }
 
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index edd1bd263f..a6ed5590dc 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -385,12 +385,6 @@ class CompilerDriver {
                 OatWriter* oat_writer,
                 File* file);
 
-  // TODO: move to a common home for llvm helpers once quick/portable are merged.
-  static void InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                         std::string* target_triple,
-                                         std::string* target_cpu,
-                                         std::string* target_attr);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index cc4ef8fde1..85186bbc22 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -150,7 +150,7 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> {
   }
 
   void RememberState() {
-    // Note that we do not need to advance the PC.
+    ImplicitlyAdvancePC();
     this->PushUint8(DW_CFA_remember_state);
   }
 
@@ -236,6 +236,10 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> {
     this->PushData(expr, expr_size);
   }
 
+  int GetCurrentPC() const {
+    return current_pc_;
+  }
+
   int GetCurrentCFAOffset() const {
     return current_cfa_offset_;
   }
diff --git a/compiler/dwarf/debug_frame_writer.h b/compiler/dwarf/debug_frame_writer.h
index 6de45f5526..b104cc9408 100644
--- a/compiler/dwarf/debug_frame_writer.h
+++ b/compiler/dwarf/debug_frame_writer.h
@@ -33,8 +33,15 @@ class DebugFrameWriter FINAL : private Writer<Allocator> {
                 int initial_opcodes_size) {
     DCHECK(cie_header_start_ == ~0u);
     cie_header_start_ = this->data()->size();
-    this->PushUint32(0);  // Length placeholder.
-    this->PushUint32(0);  // CIE id.
+    if (use_64bit_address_) {
+      // TODO: This is not related to being 64bit.
+      this->PushUint32(0xffffffff);
+      this->PushUint64(0);  // Length placeholder.
+      this->PushUint64(0);  // CIE id.
+    } else {
+      this->PushUint32(0);  // Length placeholder.
+      this->PushUint32(0);  // CIE id.
+    }
     this->PushUint8(1);   // Version.
     this->PushString("zR");
     this->PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
@@ -48,7 +55,11 @@ class DebugFrameWriter FINAL : private Writer<Allocator> {
     }
     this->PushData(initial_opcodes, initial_opcodes_size);
     this->Pad(use_64bit_address_ ? 8 : 4);
-    this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4);
+    if (use_64bit_address_) {
+      this->UpdateUint64(cie_header_start_ + 4, this->data()->size() - cie_header_start_ - 12);
+    } else {
+      this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4);
+    }
   }
 
   void WriteCIE(Reg return_address_register,
@@ -62,8 +73,15 @@ class DebugFrameWriter FINAL : private Writer<Allocator> {
                 int unwind_opcodes_size) {
     DCHECK(cie_header_start_ != ~0u);
     size_t fde_header_start = this->data()->size();
-    this->PushUint32(0);  // Length placeholder.
-    this->PushUint32(this->data()->size() - cie_header_start_);  // 'CIE_pointer'
+    if (use_64bit_address_) {
+      // TODO: This is not related to being 64bit.
+      this->PushUint32(0xffffffff);
+      this->PushUint64(0);  // Length placeholder.
+      this->PushUint64(this->data()->size() - cie_header_start_);  // 'CIE_pointer'
+    } else {
+      this->PushUint32(0);  // Length placeholder.
+      this->PushUint32(this->data()->size() - cie_header_start_);  // 'CIE_pointer'
+    }
     if (use_64bit_address_) {
       this->PushUint64(initial_address);
       this->PushUint64(address_range);
@@ -74,7 +92,11 @@ class DebugFrameWriter FINAL : private Writer<Allocator> {
     this->PushUleb128(0);  // Augmentation data size.
     this->PushData(unwind_opcodes, unwind_opcodes_size);
     this->Pad(use_64bit_address_ ? 8 : 4);
-    this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4);
+    if (use_64bit_address_) {
+      this->UpdateUint64(fde_header_start + 4, this->data()->size() - fde_header_start - 12);
+    } else {
+      this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4);
+    }
   }
 
   DebugFrameWriter(std::vector<uint8_t, Allocator>* buffer, bool use_64bit_address)
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index f3553bcc99..2b051c9e12 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -127,7 +127,8 @@ TEST_F(DwarfTest, DebugFrame) {
   CheckObjdumpOutput(is64bit, "-W");
 }
 
-TEST_F(DwarfTest, DebugFrame64) {
+// TODO: objdump seems to have trouble with 64bit CIE length.
+TEST_F(DwarfTest, DISABLED_DebugFrame64) {
   const bool is64bit = true;
   DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit);
   DebugFrameOpCodeWriter<> no_opcodes;
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 24cb364d08..354c71ec12 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -89,114 +89,126 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  if (is_x86_64) {
-    Push32(cfi_info, 0xffffffff);  // Indicates 64bit
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // CIE id: always 0.
-  if (is_x86_64) {
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(1, cfi_info);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(-8, cfi_info);
-  } else {
-    EncodeSignedLeb128(-4, cfi_info);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data.
-  if (is_x86_64) {
-    // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
-    cfi_info->push_back(0x04);
-  } else {
-    // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-    cfi_info->push_back(0x03);
-  }
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  if (is_x86_64) {
-    uint32_t length = cfi_info->size() - 12;
-    UpdateWord(cfi_info, 4, length);
-  } else {
-    uint32_t length = cfi_info->size() - 4;
-    UpdateWord(cfi_info, 0, length);
-  }
-  return cfi_info;
-}
-
-std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
+void WriteCIE(dwarf::DebugFrameWriter<>* cfi, InstructionSet isa) {
+  // Scratch registers should be marked as undefined.  This tells the
+  // debugger that its value in the previous frame is not recoverable.
   switch (isa) {
-    case kX86:
-      return ConstructCIEFrameX86(false);
-    case kX86_64:
-      return ConstructCIEFrameX86(true);
-
-    default:
-      // Not implemented.
-      return nullptr;
+    case kArm:
+    case kThumb2: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(dwarf::Reg::ArmCore(13), 0);  // R13(SP).
+      // core registers.
+      for (int reg = 0; reg < 13; reg++) {
+        if (reg < 4 || reg == 12) {
+          opcodes.Undefined(dwarf::Reg::ArmCore(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::ArmCore(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 16) {
+          opcodes.Undefined(dwarf::Reg::ArmFp(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::ArmFp(reg));
+        }
+      }
+      auto return_address_reg = dwarf::Reg::ArmCore(14);  // R14(LR).
+      cfi->WriteCIE(return_address_reg, opcodes);
+      return;
+    }
+    case kArm64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(dwarf::Reg::Arm64Core(31), 0);  // R31(SP).
+      // core registers.
+      for (int reg = 0; reg < 30; reg++) {
+        if (reg < 8 || reg == 16 || reg == 17) {
+          opcodes.Undefined(dwarf::Reg::Arm64Core(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::Arm64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 8 || reg >= 16) {
+          opcodes.Undefined(dwarf::Reg::Arm64Fp(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::Arm64Fp(reg));
+        }
+      }
+      auto return_address_reg = dwarf::Reg::Arm64Core(30);  // R30(LR).
+      cfi->WriteCIE(return_address_reg, opcodes);
+      return;
+    }
+    case kMips:
+    case kMips64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(dwarf::Reg::MipsCore(29), 0);  // R29(SP).
+      // core registers.
+      for (int reg = 1; reg < 26; reg++) {
+        if (reg < 16 || reg == 24 || reg == 25) {  // AT, V*, A*, T*.
+          opcodes.Undefined(dwarf::Reg::MipsCore(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::MipsCore(reg));
+        }
+      }
+      auto return_address_reg = dwarf::Reg::MipsCore(31);  // R31(RA).
+      cfi->WriteCIE(return_address_reg, opcodes);
+      return;
+    }
+    case kX86: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(dwarf::Reg::X86Core(4), 4);   // R4(ESP).
+      opcodes.Offset(dwarf::Reg::X86Core(8), -4);  // R8(EIP).
+      // core registers.
+      for (int reg = 0; reg < 8; reg++) {
+        if (reg <= 3) {
+          opcodes.Undefined(dwarf::Reg::X86Core(reg));
+        } else if (reg == 4) {
+          // Stack pointer.
+        } else {
+          opcodes.SameValue(dwarf::Reg::X86Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 8; reg++) {
+        opcodes.Undefined(dwarf::Reg::X86Fp(reg));
+      }
+      auto return_address_reg = dwarf::Reg::X86Core(8);  // R8(EIP).
+      cfi->WriteCIE(return_address_reg, opcodes);
+      return;
+    }
+    case kX86_64: {
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(dwarf::Reg::X86_64Core(4), 8);  // R4(RSP).
+      opcodes.Offset(dwarf::Reg::X86_64Core(16), -8);  // R16(RIP).
+      // core registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg == 4) {
+          // Stack pointer.
+        } else if (reg < 12 && reg != 3 && reg != 5) {  // except EBX and EBP.
+          opcodes.Undefined(dwarf::Reg::X86_64Core(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::X86_64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg < 12) {
+          opcodes.Undefined(dwarf::Reg::X86_64Fp(reg));
+        } else {
+          opcodes.SameValue(dwarf::Reg::X86_64Fp(reg));
+        }
+      }
+      auto return_address_reg = dwarf::Reg::X86_64Core(16);  // R16(RIP).
+      cfi->WriteCIE(return_address_reg, opcodes);
+      return;
+    }
+    case kNone:
+      break;
   }
+  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  UNREACHABLE();
 }
 
 class OatWriterWrapper FINAL : public CodeOutput {
@@ -621,8 +633,10 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
                               ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
                                          Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder,
                               OatWriter* oat_writer) {
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      ConstructCIEFrame(compiler_driver->GetInstructionSet()));
+  std::vector<uint8_t> cfi_data;
+  bool is_64bit = Is64BitInstructionSet(compiler_driver->GetInstructionSet());
+  dwarf::DebugFrameWriter<> cfi(&cfi_data, is_64bit);
+  WriteCIE(&cfi, compiler_driver->GetInstructionSet());
 
   Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr;
 
@@ -644,62 +658,17 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
                         0, STB_LOCAL, STT_NOTYPE);
     }
 
-    // Include CFI for compiled method, if possible.
-    if (cfi_info.get() != nullptr) {
-      DCHECK(it->compiled_method_ != nullptr);
-
-      // Copy in the FDE, if present
-      const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
-      if (fde != nullptr) {
-        // Copy the information into cfi_info and then fix the address in the new copy.
-        int cur_offset = cfi_info->size();
-        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-        bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff;
-
-        // Set the 'CIE_pointer' field.
-        uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4);
-        uint64_t offset_to_update = CIE_pointer;
-        if (is_64bit) {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-          (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32;
-          (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40;
-          (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48;
-          (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56;
-        } else {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-        }
-
-        // Set the 'initial_location' field.
-        offset_to_update += is_64bit ? 8 : 4;
-        if (is_64bit) {
-          const uint64_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-          (*cfi_info)[offset_to_update+4] = quick_code_start >> 32;
-          (*cfi_info)[offset_to_update+5] = quick_code_start >> 40;
-          (*cfi_info)[offset_to_update+6] = quick_code_start >> 48;
-          (*cfi_info)[offset_to_update+7] = quick_code_start >> 56;
-        } else {
-          const uint32_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-        }
-      }
+    // Include FDE for compiled method, if possible.
+    DCHECK(it->compiled_method_ != nullptr);
+    const SwapVector<uint8_t>* unwind_opcodes = it->compiled_method_->GetCFIInfo();
+    if (unwind_opcodes != nullptr) {
+      // TUNING: The headers take a lot of space. Can we have 1 FDE per file?
+      // TUNING: Some tools support compressed DWARF sections (.zdebug_*).
+      cfi.WriteFDE(text_section_address + it->low_pc_, it->high_pc_ - it->low_pc_,
+                   unwind_opcodes->data(), unwind_opcodes->size());
     }
   }
 
-  bool hasCFI = (cfi_info.get() != nullptr);
   bool hasLineInfo = false;
   for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) {
     if (dbg_info.dbgstream_ != nullptr &&
@@ -709,7 +678,8 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
     }
   }
 
-  if (hasLineInfo || hasCFI) {
+  if (!method_info.empty() &&
+      compiler_driver->GetCompilerOptions().GetGenerateGDBInformation()) {
     ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info",
                                                                    SHT_PROGBITS,
                                                                    0, nullptr, 0, 1, 0);
@@ -731,14 +701,12 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
     builder->RegisterRawSection(debug_info);
     builder->RegisterRawSection(debug_abbrev);
 
-    if (hasCFI) {
-      ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
-                                                                   SHT_PROGBITS,
-                                                                   SHF_ALLOC,
-                                                                   nullptr, 0, 4, 0);
-      eh_frame.SetBuffer(std::move(*cfi_info.get()));
-      builder->RegisterRawSection(eh_frame);
-    }
+    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
+                                                                 SHT_PROGBITS,
+                                                                 SHF_ALLOC,
+                                                                 nullptr, 0, 4, 0);
+    eh_frame.SetBuffer(std::move(cfi_data));
+    builder->RegisterRawSection(eh_frame);
 
     if (hasLineInfo) {
       builder->RegisterRawSection(debug_line);
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 2d9e03a718..45e2fd0ffe 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -93,7 +93,6 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  jni_asm->InitializeFrameDescriptionEntry();
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -432,19 +431,14 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  jni_asm->FinalizeFrameDescriptionEntry();
-  std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry());
-  ArrayRef<const uint8_t> cfi_ref;
-  if (fde != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*fde);
-  }
+
   return CompiledMethod::SwapAllocCompiledMethodCFI(driver,
                                                     instruction_set,
                                                     ArrayRef<const uint8_t>(managed_code),
                                                     frame_size,
                                                     main_jni_conv->CoreSpillMask(),
                                                     main_jni_conv->FpSpillMask(),
-                                                    cfi_ref);
+                                                    ArrayRef<const uint8_t>());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 4267743097..b17cbca2d2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -48,22 +48,30 @@ void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t liter
   uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
   value |= 0xf000d000;  // BL
 
-  uint8_t* addr = &(*code)[literal_offset];
   // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(addr[1] & 0xf8, 0xf0);
-  DCHECK_EQ(addr[3] & 0xd0, 0xd0);
+  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
   // Write the new BL.
-  addr[0] = (value >> 16) & 0xff;
-  addr[1] = (value >> 24) & 0xff;
-  addr[2] = (value >> 0) & 0xff;
-  addr[3] = (value >> 8) & 0xff;
+  SetInsn32(code, literal_offset, value);
 }
 
-void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                   const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                   uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                                   uint32_t target_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected relative dex cache array patch.";
+void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t literal_offset = patch.LiteralOffset();
+  uint32_t pc_literal_offset = patch.PcInsnOffset();
+  uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
+  uint32_t diff = target_offset - pc_base;
+
+  uint32_t insn = GetInsn32(code, literal_offset);
+  DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u);  // MOVW/MOVT, unpatched (imm16 == 0).
+  uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu);
+  uint32_t imm4 = (diff16 >> 12) & 0xfu;
+  uint32_t imm = (diff16 >> 11) & 0x1u;
+  uint32_t imm3 = (diff16 >> 8) & 0x7u;
+  uint32_t imm8 = diff16 & 0xffu;
+  insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8;
+  SetInsn32(code, literal_offset, insn);
 }
 
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
@@ -80,5 +88,31 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
   return thunk_code;
 }
 
+void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+  DCHECK_LE(offset + 4u, code->size());
+  DCHECK_EQ(offset & 1u, 0u);
+  uint8_t* addr = &(*code)[offset];
+  addr[0] = (value >> 16) & 0xff;
+  addr[1] = (value >> 24) & 0xff;
+  addr[2] = (value >> 0) & 0xff;
+  addr[3] = (value >> 8) & 0xff;
+}
+
+uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
+  DCHECK_LE(offset + 4u, code.size());
+  DCHECK_EQ(offset & 1u, 0u);
+  const uint8_t* addr = &code[offset];
+  return
+      (static_cast<uint32_t>(addr[0]) << 16) +
+      (static_cast<uint32_t>(addr[1]) << 24) +
+      (static_cast<uint32_t>(addr[2]) << 0)+
+      (static_cast<uint32_t>(addr[3]) << 8);
+}
+
+template <typename Alloc>
+uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+  return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 561130305e..2d474c2db0 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -34,6 +34,12 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
  private:
   static std::vector<uint8_t> CompileThunkCode();
 
+  void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
+
+  template <typename Alloc>
+  static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
   // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
   static constexpr int32_t kPcDisplacement = 4;
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 3b397cc5fb..a057a4cf16 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -121,6 +121,48 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
     result.push_back(static_cast<uint8_t>(bl >> 8));
     return result;
   }
+
+  void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+    static const uint8_t raw_code[] = {
+        0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
+        0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
+        0x78, 0x44,               // ADD r0, pc
+    };
+    constexpr uint32_t pc_insn_offset = 8u;
+    const ArrayRef<const uint8_t> code(raw_code);
+    LinkerPatch patches[] = {
+        LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
+        LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
+    };
+    AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
+    uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
+    // Distribute the bits of the diff between the MOVW and MOVT:
+    uint32_t diffw = diff & 0xffffu;
+    uint32_t difft = diff >> 16;
+    uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
+        ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
+    uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
+        ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
+    const uint8_t expected_code[] = {
+        static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+        static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+        static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+        static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+        0x78, 0x44,
+    };
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -285,5 +327,25 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
+  TestDexCachereference(0x00ff0000u, 0x00fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
+  TestDexCachereference(0x02ff0000u, 0x05fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
+  TestDexCachereference(0x08ff0000u, 0x08fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
+  TestDexCachereference(0xd0ff0000u, 0x60fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 439e85ca6c..5cb02172d2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1659,11 +1659,26 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
     Register lhs = InputRegisterAt(condition, 0);
     Operand rhs = InputOperandAt(condition, 1);
     Condition arm64_cond = ARM64Condition(condition->GetCondition());
-    if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
-      if (arm64_cond == eq) {
-        __ Cbz(lhs, true_target);
-      } else {
-        __ Cbnz(lhs, true_target);
+    if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      switch (arm64_cond) {
+        case eq:
+          __ Cbz(lhs, true_target);
+          break;
+        case ne:
+          __ Cbnz(lhs, true_target);
+          break;
+        case lt:
+          // Test the sign bit and branch accordingly.
+          __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        case ge:
+          // Test the sign bit and branch accordingly.
+          __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        default:
+          // Without the `static_cast` the compiler throws an error for
+          // `-Werror=sign-promo`.
+          LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
       }
     } else {
       __ Cmp(lhs, rhs);
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
new file mode 100644
index 0000000000..26cab2ff09
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_utils.h"
+
+#include "base/logging.h"
+
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
+                                     int64_t* magic, int* shift) {
+  // It does not make sense to calculate magic and shift for zero divisor.
+  DCHECK_NE(divisor, 0);
+
+  /* According to implementation from H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
+   * Chapter 10 and T,Grablund, P.L.Montogomery's "Division by Invariant Integers Using
+   * Multiplication" (PLDI 1994).
+   * The magic number M and shift S can be calculated in the following way:
+   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+   * where divisor(d) >= 2.
+   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+   * where divisor(d) <= -2.
+   * Thus nc can be calculated like:
+   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
+   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
+   *
+   * So the shift p is the smallest p satisfying
+   * 2^p > nc * (d - 2^p % d), where d >= 2
+   * 2^p > nc * (d + 2^p % d), where d <= -2.
+   *
+   * The magic number M is calcuated by
+   * M = (2^p + d - 2^p % d) / d, where d >= 2
+   * M = (2^p - d - 2^p % d) / d, where d <= -2.
+   *
+   * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32-p
+   * (resp. 64 - p) as the shift number S.
+   */
+
+  int64_t p = is_long ? 63 : 31;
+  const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31);
+
+  // Initialize the computations.
+  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint64_t tmp = exp + (is_long ? static_cast<uint64_t>(divisor) >> 63 :
+                                    static_cast<uint32_t>(divisor) >> 31);
+  uint64_t abs_nc = tmp - 1 - tmp % abs_d;
+  uint64_t quotient1 = exp / abs_nc;
+  uint64_t remainder1 = exp % abs_nc;
+  uint64_t quotient2 = exp / abs_d;
+  uint64_t remainder2 = exp % abs_d;
+
+  /*
+   * To avoid handling both positive and negative divisor, "Hacker's Delight"
+   * introduces a method to handle these 2 cases together to avoid duplication.
+   */
+  uint64_t delta;
+  do {
+    p++;
+    quotient1 = 2 * quotient1;
+    remainder1 = 2 * remainder1;
+    if (remainder1 >= abs_nc) {
+      quotient1++;
+      remainder1 = remainder1 - abs_nc;
+    }
+    quotient2 = 2 * quotient2;
+    remainder2 = 2 * remainder2;
+    if (remainder2 >= abs_d) {
+      quotient2++;
+      remainder2 = remainder2 - abs_d;
+    }
+    delta = abs_d - remainder2;
+  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+  *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+
+  if (!is_long) {
+    *magic = static_cast<int>(*magic);
+  }
+
+  *shift = is_long ? p - 64 : p - 32;
+}
+
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
new file mode 100644
index 0000000000..742d67565a
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+
+#include <cstdint>
+
+// Computes the magic number and the shift needed in the div/rem by constant algorithm
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 92b62e2c84..c4fbc1d51c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_x86.h"
 
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -2278,6 +2279,134 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
   __ addl(ESP, Immediate(2 * elem_size));
 }
 
+
+void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(1).IsConstant());
+
+  Register out_register = locations->Out().AsRegister<Register>();
+  Register input_register = locations->InAt(0).AsRegister<Register>();
+  int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ xorl(out_register, out_register);
+  } else {
+    __ movl(out_register, input_register);
+    if (imm == -1) {
+      __ negl(out_register);
+    }
+  }
+}
+
+
+void InstructionCodeGeneratorX86::DivByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv());
+
+  LocationSummary* locations = instruction->GetLocations();
+
+  Register out_register = locations->Out().AsRegister<Register>();
+  Register input_register = locations->InAt(0).AsRegister<Register>();
+  int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  DCHECK(instruction->IsDiv() && IsPowerOfTwo(std::abs(imm)));
+  Register num = locations->GetTemp(0).AsRegister<Register>();
+
+  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ testl(input_register, input_register);
+  __ cmovl(kGreaterEqual, num, input_register);
+  int shift = CTZ(imm);
+  __ sarl(num, Immediate(shift));
+
+  if (imm < 0) {
+    __ negl(num);
+  }
+
+  __ movl(out_register, num);
+}
+
+void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  Register eax = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+  Register num;
+  Register edx;
+
+  if (instruction->IsDiv()) {
+    edx = locations->GetTemp(0).AsRegister<Register>();
+    num = locations->GetTemp(1).AsRegister<Register>();
+  } else {
+    edx = locations->Out().AsRegister<Register>();
+    num = locations->GetTemp(0).AsRegister<Register>();
+  }
+
+  DCHECK_EQ(EAX, eax);
+  DCHECK_EQ(EDX, edx);
+  if (instruction->IsDiv()) {
+    DCHECK_EQ(EAX, out);
+  } else {
+    DCHECK_EQ(EDX, out);
+  }
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+  Label ndiv;
+  Label end;
+  // If numerator is 0, the result is 0, no computation needed.
+  __ testl(eax, eax);
+  __ j(kNotEqual, &ndiv);
+
+  __ xorl(out, out);
+  __ jmp(&end);
+
+  __ Bind(&ndiv);
+
+  // Save the numerator.
+  __ movl(num, eax);
+
+  // EAX = magic
+  __ movl(eax, Immediate(magic));
+
+  // EDX:EAX = magic * numerator
+  __ imull(num);
+
+  if (imm > 0 && magic < 0) {
+    // EDX += num
+    __ addl(edx, num);
+  } else if (imm < 0 && magic > 0) {
+    __ subl(edx, num);
+  }
+
+  // Shift if needed.
+  if (shift != 0) {
+    __ sarl(edx, Immediate(shift));
+  }
+
+  // EDX += 1 if EDX < 0
+  __ movl(eax, edx);
+  __ shrl(edx, Immediate(31));
+  __ addl(edx, eax);
+
+  if (instruction->IsRem()) {
+    __ movl(eax, num);
+    __ imull(edx, Immediate(imm));
+    __ subl(eax, edx);
+    __ movl(edx, eax);
+  } else {
+    __ movl(eax, edx);
+  }
+  __ Bind(&end);
+}
+
 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
 
@@ -2289,28 +2418,42 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
 
   switch (instruction->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register second_reg = second.AsRegister<Register>();
       DCHECK_EQ(EAX, first.AsRegister<Register>());
       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
 
-      SlowPathCodeX86* slow_path =
+      if (second.IsConstant()) {
+        int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+        if (imm == 0) {
+          // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
+        } else if (imm == 1 || imm == -1) {
+          DivRemOneOrMinusOne(instruction);
+        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+          DivByPowerOfTwo(instruction);
+        } else {
+          DCHECK(imm <= -2 || imm >= 2);
+          GenerateDivRemWithAnyConstant(instruction);
+        }
+      } else {
+        SlowPathCodeX86* slow_path =
           new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
-                                                                 is_div);
-      codegen_->AddSlowPath(slow_path);
+              is_div);
+        codegen_->AddSlowPath(slow_path);
 
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
-
-      __ cmpl(second_reg, Immediate(-1));
-      __ j(kEqual, slow_path->GetEntryLabel());
+        Register second_reg = second.AsRegister<Register>();
+        // 0x80000000/-1 triggers an arithmetic exception!
+        // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
+        // it's safe to just use negl instead of more complex comparisons.
 
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
+        __ cmpl(second_reg, Immediate(-1));
+        __ j(kEqual, slow_path->GetEntryLabel());
 
-      __ Bind(slow_path->GetExitLabel());
+        // edx:eax <- sign-extended of eax
+        __ cdq();
+        // eax = quotient, edx = remainder
+        __ idivl(second_reg);
+        __ Bind(slow_path->GetExitLabel());
+      }
       break;
     }
 
@@ -2350,10 +2493,16 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) {
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       // Intel uses edx:eax as the dividend.
       locations->AddTemp(Location::RegisterLocation(EDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
+      // output and request another temp.
+      if (div->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
     case Primitive::kPrimLong: {
@@ -2411,6 +2560,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
 
 void LocationsBuilderX86::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
+
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
       ? LocationSummary::kCall
       : LocationSummary::kNoCall;
@@ -2419,8 +2569,14 @@ void LocationsBuilderX86::VisitRem(HRem* rem) {
   switch (type) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RegisterLocation(EDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
+      // output and request another temp.
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
     case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0cc3c6533a..20f14fb3f4 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -163,6 +163,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem *rem);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cdbc7780a8..9e08558c8e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_x86_64.h"
 
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
@@ -2259,6 +2260,228 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
 }
 
+void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
+  int64_t imm;
+  if (second.GetConstant()->IsLongConstant()) {
+    imm = second.GetConstant()->AsLongConstant()->GetValue();
+  } else {
+    imm = second.GetConstant()->AsIntConstant()->GetValue();
+  }
+
+  DCHECK(imm == 1 || imm == -1);
+
+  switch (instruction->GetResultType()) {
+    case Primitive::kPrimInt: {
+      if (instruction->IsRem()) {
+        __ xorl(output_register, output_register);
+      } else {
+        __ movl(output_register, input_register);
+        if (imm == -1) {
+          __ negl(output_register);
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (instruction->IsRem()) {
+        __ xorq(output_register, output_register);
+      } else {
+        __ movq(output_register, input_register);
+        if (imm == -1) {
+          __ negq(output_register);
+        }
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
+void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+
+  int64_t imm;
+  if (instruction->GetResultType() == Primitive::kPrimLong) {
+    imm = second.GetConstant()->AsLongConstant()->GetValue();
+  } else {
+    imm = second.GetConstant()->AsIntConstant()->GetValue();
+  }
+
+  DCHECK(IsPowerOfTwo(std::abs(imm)));
+
+  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ testl(numerator, numerator);
+    __ cmov(kGreaterEqual, tmp, numerator);
+    int shift = CTZ(imm);
+    __ sarl(tmp, Immediate(shift));
+
+    if (imm < 0) {
+      __ negl(tmp);
+    }
+
+    __ movl(output_register, tmp);
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+    __ movq(rdx, Immediate(std::abs(imm) - 1));
+    __ addq(rdx, numerator);
+    __ testq(numerator, numerator);
+    __ cmov(kGreaterEqual, rdx, numerator);
+    int shift = CTZ(imm);
+    __ sarq(rdx, Immediate(shift));
+
+    if (imm < 0) {
+      __ negq(rdx);
+    }
+
+    __ movq(output_register, rdx);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
+      : locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
+      : locations->Out().AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  DCHECK_EQ(RAX, eax.AsRegister());
+  DCHECK_EQ(RDX, edx.AsRegister());
+  if (instruction->IsDiv()) {
+    DCHECK_EQ(RAX, out.AsRegister());
+  } else {
+    DCHECK_EQ(RDX, out.AsRegister());
+  }
+
+  int64_t magic;
+  int shift;
+
+  // TODO: can these branch be written as one?
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+    CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+    __ movl(numerator, eax);
+
+    Label no_div;
+    Label end;
+    __ testl(eax, eax);
+    __ j(kNotEqual, &no_div);
+
+    __ xorl(out, out);
+    __ jmp(&end);
+
+    __ Bind(&no_div);
+
+    __ movl(eax, Immediate(magic));
+    __ imull(numerator);
+
+    if (imm > 0 && magic < 0) {
+      __ addl(edx, numerator);
+    } else if (imm < 0 && magic > 0) {
+      __ subl(edx, numerator);
+    }
+
+    if (shift != 0) {
+      __ sarl(edx, Immediate(shift));
+    }
+
+    __ movl(eax, edx);
+    __ shrl(edx, Immediate(31));
+    __ addl(edx, eax);
+
+    if (instruction->IsRem()) {
+      __ movl(eax, numerator);
+      __ imull(edx, Immediate(imm));
+      __ subl(eax, edx);
+      __ movl(edx, eax);
+    } else {
+      __ movl(eax, edx);
+    }
+    __ Bind(&end);
+  } else {
+    int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
+
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+
+    CpuRegister rax = eax;
+    CpuRegister rdx = edx;
+
+    CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
+
+    // Save the numerator.
+    __ movq(numerator, rax);
+
+    // RAX = magic
+    __ movq(rax, Immediate(magic));
+
+    // RDX:RAX = magic * numerator
+    __ imulq(numerator);
+
+    if (imm > 0 && magic < 0) {
+      // RDX += numeratorerator
+      __ addq(rdx, numerator);
+    } else if (imm < 0 && magic > 0) {
+      // RDX -= numerator
+      __ subq(rdx, numerator);
+    }
+
+    // Shift if needed.
+    if (shift != 0) {
+      __ sarq(rdx, Immediate(shift));
+    }
+
+    // RDX += 1 if RDX < 0
+    __ movq(rax, rdx);
+    __ shrq(rdx, Immediate(63));
+    __ addq(rdx, rax);
+
+    if (instruction->IsRem()) {
+      __ movq(rax, numerator);
+
+      if (IsInt<32>(imm)) {
+        __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
+      } else {
+        __ movq(numerator, Immediate(imm));
+        __ imulq(rdx, numerator);
+      }
+
+      __ subq(rax, rdx);
+      __ movq(rdx, rax);
+    } else {
+      __ movq(rax, rdx);
+    }
+  }
+}
+
 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
@@ -2267,37 +2490,57 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
 
-  CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
 
   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
-  DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+  DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
 
-  SlowPathCodeX86_64* slow_path =
-      new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
-          out_reg.AsRegister(), type, is_div);
-  codegen_->AddSlowPath(slow_path);
+  if (second.IsConstant()) {
+    int64_t imm;
+    if (second.GetConstant()->AsLongConstant()) {
+      imm = second.GetConstant()->AsLongConstant()->GetValue();
+    } else {
+      imm = second.GetConstant()->AsIntConstant()->GetValue();
+    }
 
-  // 0x80000000(00000000)/-1 triggers an arithmetic exception!
-  // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
-  // so it's safe to just use negl instead of more complex comparisons.
-  if (type == Primitive::kPrimInt) {
-    __ cmpl(second_reg, Immediate(-1));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    // edx:eax <- sign-extended of eax
-    __ cdq();
-    // eax = quotient, edx = remainder
-    __ idivl(second_reg);
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+      DivByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
   } else {
-    __ cmpq(second_reg, Immediate(-1));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    // rdx:rax <- sign-extended of rax
-    __ cqo();
-    // rax = quotient, rdx = remainder
-    __ idivq(second_reg);
-  }
+    SlowPathCodeX86_64* slow_path =
+        new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+            out.AsRegister(), type, is_div);
+    codegen_->AddSlowPath(slow_path);
 
-  __ Bind(slow_path->GetExitLabel());
+    CpuRegister second_reg = second.AsRegister<CpuRegister>();
+    // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+    // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+    // so it's safe to just use negl instead of more complex comparisons.
+    if (type == Primitive::kPrimInt) {
+      __ cmpl(second_reg, Immediate(-1));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      // edx:eax <- sign-extended of eax
+      __ cdq();
+      // eax = quotient, edx = remainder
+      __ idivl(second_reg);
+    } else {
+      __ cmpq(second_reg, Immediate(-1));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      // rdx:rax <- sign-extended of rax
+      __ cqo();
+      // rax = quotient, rdx = remainder
+      __ idivq(second_reg);
+    }
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
@@ -2307,10 +2550,16 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       // Intel uses edx:eax as the dividend.
       locations->AddTemp(Location::RegisterLocation(RDX));
+      // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
+      // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
+      // output and request another temp.
+      if (div->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
 
@@ -2365,9 +2614,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
       locations->SetOut(Location::RegisterLocation(RDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
+      // output and request another temp.
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 375c0b03b9..be2a79e55e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -173,6 +173,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem *rem);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b6e451057a..aec2d19b1d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -320,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
 }
 
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location input = locations->InAt(0);
+  Register input_lo = input.AsRegisterPairLow<Register>();
+  Register input_hi = input.AsRegisterPairHigh<Register>();
+  Location output = locations->Out();
+  Register output_lo = output.AsRegisterPairLow<Register>();
+  Register output_hi = output.AsRegisterPairHigh<Register>();
+
+  X86Assembler* assembler = GetAssembler();
+  // Assign the inputs to the outputs, mixing low/high.
+  __ movl(output_lo, input_hi);
+  __ movl(output_hi, input_lo);
+  __ bswapl(output_lo);
+  __ bswapl(output_hi);
+}
+
 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -1330,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Offset is a long, but in 32 bit mode, we only need the low word.
+  // Can we update the invoke here to remove a TypeConvert to Long?
+  locations->SetInAt(2, Location::RequiresRegister());
+  // Expected value must be in EAX or EDX:EAX.
+  // For long, new value must be in ECX:EBX.
+  if (type == Primitive::kPrimLong) {
+    locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+    locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+  } else {
+    locations->SetInAt(3, Location::RegisterLocation(EAX));
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+
+  // Force a byte register for the output.
+  locations->SetOut(Location::RegisterLocation(EAX));
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    // Need a byte register for marking.
+    locations->AddTemp(Location::RegisterLocation(ECX));
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register base = locations->InAt(1).AsRegister<Register>();
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location out = locations->Out();
+  DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+  if (type == Primitive::kPrimLong) {
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+    __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+  } else {
+    // Integer or object.
+    DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+    Register value = locations->InAt(4).AsRegister<Register>();
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setb(kZero, out.AsRegister<Register>());
+  __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+                     X86Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg = locations->InAt(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  // We want to swap high/low, then bswap each one, and then do the same
+  // as a 32 bit reverse.
+  // Exchange high and low.
+  __ movl(temp, reg_low);
+  __ movl(reg_low, reg_high);
+  __ movl(reg_high, temp);
+
+  // bit-reverse low
+  __ bswapl(reg_low);
+  SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+  // bit-reverse high
+  __ bswapl(reg_high);
+  SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1338,16 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
 void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f6fa013cc6..5122a00d92 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1202,6 +1202,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  // expected value must be in EAX/RAX.
+  locations->SetInAt(3, Location::RegisterLocation(RAX));
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+  DCHECK_EQ(expected.AsRegister(), RAX);
+  CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+  } else {
+    // Integer or object.
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                          locations->GetTemp(1).AsRegister<CpuRegister>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setcc(kZero, out);
+  __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+                     X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+                       int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  __ movq(temp_mask, Immediate(mask));
+  __ movq(temp, reg);
+  __ shrq(reg, imm_shift);
+  __ andq(temp, temp_mask);
+  __ andq(reg, temp_mask);
+  __ shlq(temp, imm_shift);
+  __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+   * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+   * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+   */
+  __ bswapq(reg);
+  SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+  SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+  SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1210,14 +1379,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 90a530aa5e..12798edac5 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -204,8 +204,13 @@ class OptimizingCompiler FINAL : public Compiler {
                 const std::vector<const art::DexFile*>& dex_files,
                 const std::string& android_root,
                 bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                        *GetCompilerDriver());
+    if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+      return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+                                           *GetCompilerDriver());
+    } else {
+      return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+                                           *GetCompilerDriver());
+    }
   }
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 6286b106aa..3b42f63509 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
 
 
 void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  ldrd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
 
 
 void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  strd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
@@ -2613,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type,
   Register tmp_reg = kNoRegister;
   if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    if (reg != IP) {
+    if (reg != IP &&
+        (type != kStoreWordPair || reg + 1 != IP)) {
       tmp_reg = IP;
     } else {
-      // Be careful not to use IP twice (for `reg` and to build the
-      // Address object used by the store instruction(s) below).
-      // Instead, save R5 on the stack (or R6 if R5 is not available),
-      // use it as secondary temporary register, and restore it after
-      // the store instruction has been emitted.
+      // Be careful not to use IP twice (for `reg` (or `reg` + 1 in
+      // the case of a word-pair store)) and to build the Address
+      // object used by the store instruction(s) below).  Instead,
+      // save R5 on the stack (or R6 if R5 is not available), use it
+      // as secondary temporary register, and restore it after the
+      // store instruction has been emitted.
       tmp_reg = base != R5 ? R5 : R6;
       Push(tmp_reg);
       if (base == SP) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 81dd13894f..e33c240dbf 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler {
   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd` + 1.
   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd2`.
+  // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
+  // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
+  void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
+  void strd(Register rd, Register rd2, const Address& ad, Condition cond);
+
+
   void ldm(BlockAddressMode am, Register base,
            RegList regs, Condition cond = AL) OVERRIDE;
   void stm(BlockAddressMode am, Register base,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 813996b0db..5f5561a499 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -247,4 +247,103 @@ TEST_F(AssemblerThumb2Test, add) {
   DriverStr(expected, "add");
 }
 
+TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4092;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "str r0, [sp, #4092]\n"
+      "str ip, [sp, #4092]\n"
+      "str ip, [r5, #4092]\n";
+  DriverStr(expected, "StoreWordToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4096;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
+      "add ip, ip, sp\n"
+      "str r0, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"  // Push(r5)
+      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "str ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"    // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"  // Push(r6)
+      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
+      "add r6, r6, r5\n"
+      "str ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";   // Pop(r6)
+  DriverStr(expected, "StoreWordToNonThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1020;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // We cannot use IP (i.e. R12) as first source register, as it would
+  // force us to use SP (i.e. R13) as second source register, which
+  // would have an "unpredictable" effect according to the ARMv7
+  // specification (the T1 encoding describes the result as
+  // UNPREDICTABLE when of the source registers is R13).
+  //
+  // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the
+  // following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "strd r0, r1, [sp, #1020]\n"
+      "strd r11, ip, [sp, #1020]\n"
+      "strd r11, ip, [r5, #1020]\n";
+  DriverStr(expected, "StoreWordPairToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1024;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset
+  // regarding the use of (R11, IP) (e.g. (R11, R12)) as source
+  // registers in the following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
+      "add ip, ip, sp\n"
+      "strd r0, r1, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"      // Push(r5)
+      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "strd r11, ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"        // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"      // Push(r6)
+      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
+      "add r6, r6, r5\n"
+      "strd r11, ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";       // Pop(r6)
+  DriverStr(expected, "StoreWordPairToNonThumbOffset");
+}
+
 }  // namespace art
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 923ecdbd9d..323f93cb42 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -504,12 +504,6 @@ class Assembler {
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
-  virtual void InitializeFrameDescriptionEntry() {}
-  virtual void FinalizeFrameDescriptionEntry() {}
-  // Give a vector containing FDE data, or null if not used. Note: the assembler must take care
-  // of handling the lifecycle.
-  virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
-
   virtual ~Assembler() {}
 
  protected:
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
deleted file mode 100644
index a7e09c6517..0000000000
--- a/compiler/utils/dwarf_cfi.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "leb128.h"
-#include "utils.h"
-
-#include "dwarf_cfi.h"
-
-namespace art {
-
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
-  if (increment < 64) {
-    // Encoding in opcode.
-    buf->push_back(0x1 << 6 | increment);
-  } else if (increment < 256) {
-    // Single byte delta.
-    buf->push_back(0x02);
-    buf->push_back(increment);
-  } else if (increment < 256 * 256) {
-    // Two byte delta.
-    buf->push_back(0x03);
-    buf->push_back(increment & 0xff);
-    buf->push_back((increment >> 8) & 0xff);
-  } else {
-    // Four byte delta.
-    buf->push_back(0x04);
-    Push32(buf, increment);
-  }
-}
-
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
-  buf->push_back(0x11);
-  EncodeUnsignedLeb128(reg, buf);
-  EncodeSignedLeb128(offset, buf);
-}
-
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
-  buf->push_back((0x2 << 6) | reg);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
-  buf->push_back(0x0e);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0a);
-}
-
-void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0b);
-}
-
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
-  // 'length' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0xffffffff);  // Indicates 64bit
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'CIE_pointer' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'initial_location' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'address_range' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // Augmentation length: 0
-  buf->push_back(0);
-}
-
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) {
-  const size_t kOffsetOfAddressRange = is_64bit? 28 : 12;
-  CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4));
-
-  uint8_t *p = buf->data() + kOffsetOfAddressRange;
-  if (is_64bit) {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-    p[4] = data >> 32;
-    p[5] = data >> 40;
-    p[6] = data >> 48;
-    p[7] = data >> 56;
-  } else {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-  }
-}
-
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) {
-  uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4;
-  DCHECK_EQ((length & 0x3), 0U);
-
-  uint8_t *p = is_64bit? buf->data() + 4 : buf->data();
-  if (is_64bit) {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-    p[4] = length >> 32;
-    p[5] = length >> 40;
-    p[6] = length >> 48;
-    p[7] = length >> 56;
-  } else {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-  }
-}
-
-void PadCFI(std::vector<uint8_t>* buf) {
-  while (buf->size() & 0x3) {
-    buf->push_back(0);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
deleted file mode 100644
index 0c8b1516dd..0000000000
--- a/compiler/utils/dwarf_cfi.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
-#define ART_COMPILER_UTILS_DWARF_CFI_H_
-
-#include <vector>
-
-namespace art {
-
-/**
- * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
-
-/**
- * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param offset New offset of CFA.
- */
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_remember_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_restore_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Write FDE header into an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Set 'address_range' field of an FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit);
-
-/**
- * @brief Set 'length' field of an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
- * @param buf FDE buffer.
- */
-void PadCFI(std::vector<uint8_t>* buf);
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b3a1376727..4cca529258 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86 {
@@ -1467,6 +1466,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) {
   EmitOperand(reg, address);
 }
 
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xC7);
+  EmitOperand(1, address);
+}
+
+
 void X86Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1631,69 +1639,32 @@ void X86Assembler::EmitGenericShift(int reg_or_opcode,
   EmitOperand(reg_or_opcode, Operand(operand));
 }
 
-void X86Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, false /* is_64bit */);
-}
-
-void X86Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, false /* is_64bit */);
-}
-
 constexpr size_t kFramePointerSize = 4;
 
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    pushl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    pushl(spill);
     gpr_count++;
-
-    // DW_CFA_advance_loc
-    DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-    cfi_pc_ = buffer_.Size();
-    // DW_CFA_def_cfa_offset
-    cfi_cfa_offset_ += kFramePointerSize;
-    DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-    // DW_CFA_offset reg offset
-    reg_offset++;
-    DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
   }
 
-  // return address then method on stack
+  // return address then method on stack.
   int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-
   pushl(method_reg.AsX86().AsCpuRegister());
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += kFramePointerSize;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
     if (spill.AsX86().IsCpuRegister()) {
-      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+      int offset = frame_size + spill.getSpillOffset();
+      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
     } else {
       DCHECK(spill.AsX86().IsXmmRegister());
       if (spill.getSize() == 8) {
@@ -1709,8 +1680,9 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
-                      sizeof(StackReference<mirror::ArtMethod>)));
+  int adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+               sizeof(StackReference<mirror::ArtMethod>);
+  addl(ESP, Immediate(adjust));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
     DCHECK(spill.IsCpuRegister());
@@ -1722,12 +1694,6 @@ void X86Assembler::RemoveFrame(size_t frame_size,
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index bdf88435a4..f3675aeceb 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -205,7 +205,7 @@ class Address : public Operand {
 
 class X86Assembler FINAL : public Assembler {
  public:
-  explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  explicit X86Assembler() {}
   virtual ~X86Assembler() {}
 
   /*
@@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler {
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
+  void cmpxchg8b(const Address& address);
 
   void mfence();
 
@@ -476,6 +477,10 @@ class X86Assembler FINAL : public Assembler {
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchg8b(const Address& address) {
+    lock()->cmpxchg8b(address);
+  }
+
   //
   // Misc. functionality
   //
@@ -599,12 +604,6 @@ class X86Assembler FINAL : public Assembler {
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
@@ -623,9 +622,6 @@ class X86Assembler FINAL : public Assembler {
   void EmitGenericShift(int rm, Register reg, const Immediate& imm);
   void EmitGenericShift(int rm, Register operand, Register shifter);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
-
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510afb..dba3b6ba67 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) {
   DriverStr(expected, "LoadLongConstant");
 }
 
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::EDI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+      x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+      x86::Register(x86::ESI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %ESI, (%EBP)\n"
+    "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+  const char* expected =
+    "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b (%EBP)\n"
+    "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg8b");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 5d46ee25cd..09d2b4919d 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -88,14 +88,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    // For all the X86 registers we care about:
-    // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-    // DWARF register id is the same as id_.
-    return static_cast<int>(id_);
-  }
-
   ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 9ad31c725c..3ba926236f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86_64 {
@@ -1625,6 +1624,14 @@ void X86_64Assembler::imull(CpuRegister reg) {
 }
 
 
+void X86_64Assembler::imulq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitOperand(5, Operand(reg));
+}
+
+
 void X86_64Assembler::imull(const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(address);
@@ -1858,11 +1865,22 @@ X86_64Assembler* X86_64Assembler::lock() {
 
 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
   EmitUint8(0x0F);
   EmitUint8(0xB1);
   EmitOperand(reg.LowBits(), address);
 }
 
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg, address);
+  EmitUint8(0x0F);
+  EmitUint8(0xB1);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -2179,26 +2197,12 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const
   }
 }
 
-void X86_64Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, true /* is_64bit */);
-}
-
-void X86_64Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, true /* is_64bit */);
-}
-
 constexpr size_t kFramePointerSize = 8;
 
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -2206,29 +2210,13 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
-
-      // DW_CFA_advance_loc
-      DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-      cfi_pc_ = buffer_.Size();
-      // DW_CFA_def_cfa_offset
-      cfi_cfa_offset_ += kFramePointerSize;
-      DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-      // DW_CFA_offset reg offset
-      reg_offset++;
-      DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
     }
   }
-  // return address then method on stack
+  // return address then method on stack.
   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
   subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += rest_of_frame;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   // spill xmms
   int64_t offset = rest_of_frame;
@@ -2293,12 +2281,6 @@ void X86_64Assembler::RemoveFrame(size_t frame_size,
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 39f781cb1c..d357a813e8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -244,7 +244,7 @@ class Address : public Operand {
 
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  X86_64Assembler() {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -468,6 +468,7 @@ class X86_64Assembler FINAL : public Assembler {
   void imull(CpuRegister reg, const Immediate& imm);
   void imull(CpuRegister reg, const Address& address);
 
+  void imulq(CpuRegister src);
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
@@ -517,6 +518,7 @@ class X86_64Assembler FINAL : public Assembler {
 
   X86_64Assembler* lock();
   void cmpxchgl(const Address& address, CpuRegister reg);
+  void cmpxchgq(const Address& address, CpuRegister reg);
 
   void mfence();
 
@@ -539,6 +541,10 @@ class X86_64Assembler FINAL : public Assembler {
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchgq(const Address& address, CpuRegister reg) {
+    lock()->cmpxchgq(address, reg);
+  }
+
   //
   // Misc. functionality
   //
@@ -663,12 +669,6 @@ class X86_64Assembler FINAL : public Assembler {
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   void EmitUint8(uint8_t value);
   void EmitInt32(int32_t value);
@@ -714,9 +714,6 @@ class X86_64Assembler FINAL : public Assembler {
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
 };
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b90c142132..116190a832 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -315,6 +315,10 @@ TEST_F(AssemblerX86_64Test, AddlImm) {
   DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli");
 }
 
+TEST_F(AssemblerX86_64Test, ImulqReg1) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq");
+}
+
 TEST_F(AssemblerX86_64Test, ImulqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq");
 }
@@ -585,6 +589,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) {
   // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
 }
 
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %ESI, (%R13)\n"
+    "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %RSI, (%R13)\n"
+    "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg");
+}
+
 TEST_F(AssemblerX86_64Test, Movl) {
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 3a96ad0b51..822659fffc 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -87,21 +87,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    switch (id_) {
-      case RAX: return  0;
-      case RDX: return  1;
-      case RCX: return  2;
-      case RBX: return  3;
-      case RSI: return  4;
-      case RDI: return  5;
-      case RBP: return  6;
-      case RSP: return  7;
-      default: return static_cast<int>(id_);  // R8 ~ R15
-    }
-  }
-
   CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index a1834e1e9a..ba0c0bdebd 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -942,7 +942,7 @@ DISASSEMBLER_ENTRY(cmp,
           opcode1 = "pextrw";
           prefix[2] = 0;
           has_modrm = true;
-          store = true;
+          load = true;
           src_reg_file = SSE;
           immediate_bytes = 1;
         } else {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 6e7b04fc93..af008347cd 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@ enum LockLevel {
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
+  kJdwpShutdownLock,
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6759c4d9c3..a909a1afbe 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@ static JDWP::JdwpOptions gJdwpOptions;
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
 
@@ -319,6 +318,7 @@ static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER;
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
 bool Dbg::gDebuggerActive = false;
+bool Dbg::gDisposed = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -551,7 +551,7 @@ void Dbg::StopJdwp() {
     gJdwpState->PostVMDeath();
   }
   // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
-  Disposed();
+  Dispose();
   delete gJdwpState;
   gJdwpState = nullptr;
   delete gRegistry;
@@ -599,14 +599,6 @@ void Dbg::Connected() {
   gDisposed = false;
 }
 
-void Dbg::Disposed() {
-  gDisposed = true;
-}
-
-bool Dbg::IsDisposed() {
-  return gDisposed;
-}
-
 bool Dbg::RequiresDeoptimization() {
   // We don't need deoptimization if everything runs with interpreter after
   // enabling -Xint mode.
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 5898784c43..dd7f9c56fa 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -239,7 +239,9 @@ class Dbg {
   static void GoActive()
       LOCKS_EXCLUDED(Locks::breakpoint_lock_, Locks::deoptimization_lock_, Locks::mutator_lock_);
   static void Disconnected() LOCKS_EXCLUDED(Locks::deoptimization_lock_, Locks::mutator_lock_);
-  static void Disposed();
+  static void Dispose() {
+    gDisposed = true;
+  }
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
@@ -255,9 +257,12 @@ class Dbg {
 
   // Returns true if a method has any breakpoints.
   static bool MethodHasAnyBreakpoints(mirror::ArtMethod* method)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::breakpoint_lock_);
 
-  static bool IsDisposed();
+  static bool IsDisposed() {
+    return gDisposed;
+  }
 
   /*
    * Time, in milliseconds, since the last debugger activity.  Does not
@@ -756,6 +761,10 @@ class Dbg {
   // Indicates whether the debugger is making requests.
   static bool gDebuggerActive;
 
+  // Indicates whether we should drop the JDWP connection because the runtime stops or the
+  // debugger called VirtualMachine.Dispose.
+  static bool gDisposed;
+
   // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index bc5cf9b1ff..411ec43aab 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -1630,8 +1630,10 @@ static bool IsFDE(FDE64* frame) {
   return frame->CIE_pointer != 0;
 }
 
-static bool FixupEHFrame(off_t base_address_delta,
-                           uint8_t* eh_frame, size_t eh_frame_size) {
+template <typename Elf_SOff>
+static bool FixupEHFrame(Elf_SOff base_address_delta, uint8_t* eh_frame, size_t eh_frame_size) {
+  // TODO: Check the spec whether this is really data-dependent, or whether it's clear from the
+  //       ELF file whether we should expect 32-bit or 64-bit.
   if (*(reinterpret_cast<uint32_t*>(eh_frame)) == 0xffffffff) {
     FDE64* last_frame = reinterpret_cast<FDE64*>(eh_frame + eh_frame_size);
     FDE64* frame = NextFDE(reinterpret_cast<FDE64*>(eh_frame));
@@ -1643,6 +1645,7 @@ static bool FixupEHFrame(off_t base_address_delta,
     }
     return true;
   } else {
+    CHECK(IsInt<32>(base_address_delta));
     FDE32* last_frame = reinterpret_cast<FDE32*>(eh_frame + eh_frame_size);
     FDE32* frame = NextFDE(reinterpret_cast<FDE32*>(eh_frame));
     for (; frame < last_frame; frame = NextFDE(frame)) {
@@ -1772,7 +1775,9 @@ class DebugLineInstructionIterator FINAL {
   uint8_t* current_instruction_;
 };
 
-static bool FixupDebugLine(off_t base_offset_delta, DebugLineInstructionIterator* iter) {
+template <typename Elf_SOff>
+static bool FixupDebugLine(Elf_SOff base_offset_delta, DebugLineInstructionIterator* iter) {
+  CHECK(IsInt<32>(base_offset_delta));
   for (; iter->GetInstruction(); iter->Next()) {
     if (iter->IsExtendedOpcode() && iter->GetOpcode() == dwarf::DW_LNE_set_address) {
       *reinterpret_cast<uint32_t*>(iter->GetArguments()) += base_offset_delta;
@@ -2044,7 +2049,9 @@ class DebugInfoIterator {
   DebugTag* current_tag_;
 };
 
-static bool FixupDebugInfo(off_t base_address_delta, DebugInfoIterator* iter) {
+template <typename Elf_SOff>
+static bool FixupDebugInfo(Elf_SOff base_address_delta, DebugInfoIterator* iter) {
+  CHECK(IsInt<32>(base_address_delta));
   do {
     if (iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_low_pc) != sizeof(int32_t) ||
         iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_high_pc) != sizeof(int32_t)) {
@@ -2066,7 +2073,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupDebugSections(off_t base_address_delta) {
+    ::FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta) {
   const Elf_Shdr* debug_info = FindSectionByName(".debug_info");
   const Elf_Shdr* debug_abbrev = FindSectionByName(".debug_abbrev");
   const Elf_Shdr* eh_frame = FindSectionByName(".eh_frame");
@@ -2280,7 +2287,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::Fixup(uintptr_t base_address) {
+    ::Fixup(Elf_Addr base_address) {
   if (!FixupDynamic(base_address)) {
     LOG(WARNING) << "Failed to fixup .dynamic in " << file_->GetPath();
     return false;
@@ -2305,7 +2312,8 @@ bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_->GetPath();
     return false;
   }
-  if (!FixupDebugSections(base_address)) {
+  static_assert(sizeof(Elf_Off) >= sizeof(base_address), "Potentially losing precision.");
+  if (!FixupDebugSections(static_cast<Elf_Off>(base_address))) {
     LOG(WARNING) << "Failed to fixup debug sections in " << file_->GetPath();
     return false;
   }
@@ -2317,7 +2325,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupDynamic(uintptr_t base_address) {
+    ::FixupDynamic(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetDynamicNum(); i++) {
     Elf_Dyn& elf_dyn = GetDynamic(i);
     Elf_Word d_tag = elf_dyn.d_tag;
@@ -2341,7 +2349,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupSectionHeaders(uintptr_t base_address) {
+    ::FixupSectionHeaders(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
     Elf_Shdr* sh = GetSectionHeader(i);
     CHECK(sh != nullptr);
@@ -2365,7 +2373,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupProgramHeaders(uintptr_t base_address) {
+    ::FixupProgramHeaders(Elf_Addr base_address) {
   // TODO: ELFObjectFile doesn't have give to Elf_Phdr, so we do that ourselves for now.
   for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
     Elf_Phdr* ph = GetProgramHeader(i);
@@ -2392,7 +2400,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupSymbols(uintptr_t base_address, bool dynamic) {
+    ::FixupSymbols(Elf_Addr base_address, bool dynamic) {
   Elf_Word section_type = dynamic ? SHT_DYNSYM : SHT_SYMTAB;
   // TODO: Unfortunate ELFObjectFile has protected symbol access, so use ElfFile
   Elf_Shdr* symbol_section = FindSectionByType(section_type);
@@ -2422,7 +2430,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupRelocations(uintptr_t base_address) {
+    ::FixupRelocations(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
     Elf_Shdr* sh = GetSectionHeader(i);
     CHECK(sh != nullptr);
@@ -2622,7 +2630,14 @@ bool ElfFile::Strip(File* file, std::string* error_msg) {
     return elf_file->elf32_->Strip(error_msg);
 }
 
-bool ElfFile::Fixup(uintptr_t base_address) {
+bool ElfFile::Fixup(uint64_t base_address) {
+  if (elf64_.get() != nullptr) {
+    return elf64_->Fixup(static_cast<Elf64_Addr>(base_address));
+  } else {
+    DCHECK(elf32_.get() != nullptr);
+    CHECK(IsUint<32>(base_address)) << std::hex << base_address;
+    return elf32_->Fixup(static_cast<Elf32_Addr>(base_address));
+  }
   DELEGATE_TO_IMPL(Fixup, base_address);
 }
 
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 41c54bce7b..286c2a638c 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -78,9 +78,9 @@ class ElfFile {
 
   // Fixup an ELF file so that that oat header will be loaded at oat_begin.
   // Returns true on success, false on failure.
-  static bool Fixup(File* file, uintptr_t oat_data_begin);
+  static bool Fixup(File* file, uint64_t oat_data_begin);
 
-  bool Fixup(uintptr_t base_address);
+  bool Fixup(uint64_t base_address);
 
   bool Is64Bit() const {
     return elf64_.get() != nullptr;
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index a70fa17868..16d3857086 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -19,6 +19,7 @@
 
 #include <map>
 #include <memory>
+#include <type_traits>
 #include <vector>
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
@@ -102,13 +103,13 @@ class ElfFileImpl {
   // executable is true at run time, false at compile time.
   bool Load(bool executable, std::string* error_msg);
 
-  bool Fixup(uintptr_t base_address);
-  bool FixupDynamic(uintptr_t base_address);
-  bool FixupSectionHeaders(uintptr_t base_address);
-  bool FixupProgramHeaders(uintptr_t base_address);
-  bool FixupSymbols(uintptr_t base_address, bool dynamic);
-  bool FixupRelocations(uintptr_t base_address);
-  bool FixupDebugSections(off_t base_address_delta);
+  bool Fixup(Elf_Addr base_address);
+  bool FixupDynamic(Elf_Addr base_address);
+  bool FixupSectionHeaders(Elf_Addr base_address);
+  bool FixupProgramHeaders(Elf_Addr base_address);
+  bool FixupSymbols(Elf_Addr base_address, bool dynamic);
+  bool FixupRelocations(Elf_Addr base_address);
+  bool FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta);
 
   bool Strip(std::string* error_msg);
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index d6a6595127..cdb3e2af79 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -949,6 +949,10 @@ void Hprof::DumpHeapObject(mirror::Object* obj) {
 }
 
 void Hprof::DumpHeapClass(mirror::Class* klass) {
+  if (!klass->IsLoaded() && !klass->IsErroneous()) {
+    // Class is allocated but not yet loaded: we cannot access its fields or super class.
+    return;
+  }
   size_t sFieldCount = klass->NumStaticFields();
   if (sFieldCount != 0) {
     int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index e16221c69a..31c9a0bb4e 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -403,6 +403,14 @@ struct JdwpState {
   // Used for VirtualMachine.Exit command handling.
   bool should_exit_;
   int exit_status_;
+
+  // Used to synchronize runtime shutdown with JDWP command handler thread.
+  // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the
+  // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait
+  // for the command to finish so we can send its reply before closing the connection.
+  Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_);
+  ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_);
+  bool processing_request_ GUARDED_BY(shutdown_lock_);
 };
 
 std::string DescribeField(const FieldId& field_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 0d161bc100..d0ca214ee4 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -271,7 +271,7 @@ static JdwpError VM_IDSizes(JdwpState*, Request*, ExpandBuf* pReply)
 
 static JdwpError VM_Dispose(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::Disposed();
+  Dbg::Dispose();
   return ERR_NONE;
 }
 
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e2b88a5e79..5b30f0cd8c 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -126,6 +126,7 @@ void JdwpNetStateBase::Close() {
  */
 ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) {
   MutexLock mu(Thread::Current(), socket_lock_);
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   DCHECK_LE(length, expandBufGetLength(pReply));
   return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length));
 }
@@ -140,6 +141,7 @@ ssize_t JdwpNetStateBase::WriteBufferedPacket(const std::vector<iovec>& iov) {
 
 ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) {
   socket_lock_.AssertHeld(Thread::Current());
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size()));
 }
 
@@ -225,7 +227,10 @@ JdwpState::JdwpState(const JdwpOptions* options)
       jdwp_token_owner_thread_id_(0),
       ddm_is_active_(false),
       should_exit_(false),
-      exit_status_(0) {
+      exit_status_(0),
+      shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
+      shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
+      processing_request_(false) {
 }
 
 /*
@@ -338,10 +343,20 @@ void JdwpState::ResetState() {
 JdwpState::~JdwpState() {
   if (netState != nullptr) {
     /*
-     * Close down the network to inspire the thread to halt.
+     * Close down the network to inspire the thread to halt. If a request is being processed,
+     * we need to wait for it to finish first.
      */
-    VLOG(jdwp) << "JDWP shutting down net...";
-    netState->Shutdown();
+    {
+      Thread* self = Thread::Current();
+      MutexLock mu(self, shutdown_lock_);
+      while (processing_request_) {
+        VLOG(jdwp) << "JDWP command in progress: wait for it to finish ...";
+        shutdown_cond_.Wait(self);
+      }
+
+      VLOG(jdwp) << "JDWP shutting down net...";
+      netState->Shutdown();
+    }
 
     if (debug_thread_started_) {
       run = false;
@@ -369,7 +384,13 @@ bool JdwpState::IsActive() {
 
 // Returns "false" if we encounter a connection-fatal error.
 bool JdwpState::HandlePacket() {
-  JdwpNetStateBase* netStateBase = reinterpret_cast<JdwpNetStateBase*>(netState);
+  Thread* const self = Thread::Current();
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = true;
+  }
+  JdwpNetStateBase* netStateBase = netState;
+  CHECK(netStateBase != nullptr) << "Connection has been closed";
   JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_);
 
   ExpandBuf* pReply = expandBufAlloc();
@@ -388,6 +409,11 @@ bool JdwpState::HandlePacket() {
   }
   expandBufFree(pReply);
   netStateBase->ConsumeBytes(request.GetLength());
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = false;
+    shutdown_cond_.Broadcast(self);
+  }
   return true;
 }
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index c182a4d9ad..87ae64d1d4 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -297,22 +297,15 @@ static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jobject cookie
   return result;
 }
 
-// Java: dalvik.system.DexFile.UP_TO_DATE
-static const jbyte kUpToDate = 0;
-// Java: dalvik.system.DexFile.DEXOPT_NEEDED
-static const jbyte kPatchoatNeeded = 1;
-// Java: dalvik.system.DexFile.PATCHOAT_NEEDED
-static const jbyte kDexoptNeeded = 2;
-
-static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
+static jint GetDexOptNeeded(JNIEnv* env, const char* filename,
     const char* pkgname, const char* instruction_set, const jboolean defer) {
 
   if ((filename == nullptr) || !OS::FileExists(filename)) {
-    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+    LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
     const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
-    return kUpToDate;
+    return OatFileAssistant::kNoDexOptNeeded;
   }
 
   const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
@@ -330,7 +323,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
-    return kUpToDate;
+    return OatFileAssistant::kNoDexOptNeeded;
   }
 
   // TODO: Checking the profile should probably be done in the GetStatus()
@@ -343,7 +336,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
         if (!defer) {
           oat_file_assistant.CopyProfileFile();
         }
-        return kDexoptNeeded;
+        return OatFileAssistant::kDex2OatNeeded;
       } else if (oat_file_assistant.ProfileExists()
           && !oat_file_assistant.OldProfileExists()) {
         if (!defer) {
@@ -353,16 +346,10 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
     }
   }
 
-  OatFileAssistant::Status status = oat_file_assistant.GetStatus();
-  switch (status) {
-    case OatFileAssistant::kUpToDate: return kUpToDate;
-    case OatFileAssistant::kNeedsRelocation: return kPatchoatNeeded;
-    case OatFileAssistant::kOutOfDate: return kDexoptNeeded;
-  }
-  UNREACHABLE();
+  return oat_file_assistant.GetDexOptNeeded();
 }
 
-static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename,
     jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
@@ -376,25 +363,25 @@ static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFil
     return 0;
   }
 
-  return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(),
-                                instruction_set.c_str(), defer);
+  return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(),
+                         instruction_set.c_str(), defer);
 }
 
 // public API, NULL pkgname
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
   const char* instruction_set = GetInstructionSetString(kRuntimeISA);
   ScopedUtfChars filename(env, javaFilename);
-  return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */,
-                                             instruction_set, false /* defer */);
+  jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */,
+                                instruction_set, false /* defer */);
+  return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
 }
 
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"),
   NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"),
+  NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"),
 };
 
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index d92f59bde6..e5c27b2430 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -142,31 +142,31 @@ bool OatFileAssistant::Lock(std::string* error_msg) {
   return true;
 }
 
-OatFileAssistant::Status OatFileAssistant::GetStatus() {
+OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
   // TODO: If the profiling code is ever restored, it's worth considering
   // whether we should check to see if the profile is out of date here.
 
-  if (OdexFileIsOutOfDate()) {
-    // The DEX file is not pre-compiled.
-    // TODO: What if the oat file is not out of date? Could we relocate it
-    // from itself?
-    return OatFileIsUpToDate() ? kUpToDate : kOutOfDate;
-  } else {
-    // The DEX file is pre-compiled. If the oat file isn't up to date, we can
-    // patch the pre-compiled version rather than recompiling.
-    if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
-      return kUpToDate;
-    } else {
-      return kNeedsRelocation;
-    }
+  if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
+    return kNoDexOptNeeded;
   }
+
+  if (OdexFileNeedsRelocation()) {
+    return kPatchOatNeeded;
+  }
+
+  if (OatFileNeedsRelocation()) {
+    return kSelfPatchOatNeeded;
+  }
+
+  return kDex2OatNeeded;
 }
 
 bool OatFileAssistant::MakeUpToDate(std::string* error_msg) {
-  switch (GetStatus()) {
-    case kUpToDate: return true;
-    case kNeedsRelocation: return RelocateOatFile(error_msg);
-    case kOutOfDate: return GenerateOatFile(error_msg);
+  switch (GetDexOptNeeded()) {
+    case kNoDexOptNeeded: return true;
+    case kDex2OatNeeded: return GenerateOatFile(error_msg);
+    case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
+    case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
   }
   UNREACHABLE();
 }
@@ -269,14 +269,14 @@ bool OatFileAssistant::OdexFileExists() {
   return GetOdexFile() != nullptr;
 }
 
-OatFileAssistant::Status OatFileAssistant::OdexFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
   if (OdexFileIsOutOfDate()) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (OdexFileIsUpToDate()) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
@@ -293,7 +293,7 @@ bool OatFileAssistant::OdexFileIsOutOfDate() {
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
-  return OdexFileStatus() == kNeedsRelocation;
+  return OdexFileStatus() == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
@@ -338,14 +338,14 @@ bool OatFileAssistant::OatFileExists() {
   return GetOatFile() != nullptr;
 }
 
-OatFileAssistant::Status OatFileAssistant::OatFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
   if (OatFileIsOutOfDate()) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (OatFileIsUpToDate()) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
@@ -362,7 +362,7 @@ bool OatFileAssistant::OatFileIsOutOfDate() {
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
-  return OatFileStatus() == kNeedsRelocation;
+  return OatFileStatus() == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
@@ -378,17 +378,17 @@ bool OatFileAssistant::OatFileIsUpToDate() {
   return cached_oat_file_is_up_to_date_;
 }
 
-OatFileAssistant::Status OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
+OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
   // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
   // is more work than we need to do. If performance becomes a concern, and
   // this method is actually called, this should be fixed.
   if (GivenOatFileIsOutOfDate(file)) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (GivenOatFileIsUpToDate(file)) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
@@ -451,7 +451,7 @@ bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
 }
 
 bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kNeedsRelocation;
+  return GivenOatFileStatus(file) == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
@@ -592,16 +592,17 @@ void OatFileAssistant::CopyProfileFile() {
   }
 }
 
-bool OatFileAssistant::RelocateOatFile(std::string* error_msg) {
+bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
+                                       std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
-  if (OdexFileName() == nullptr) {
+  if (input_file == nullptr) {
     *error_msg = "Patching of oat file for dex location "
       + std::string(dex_location_)
-      + " not attempted because the odex file name could not be determined.";
+      + " not attempted because the input file name could not be determined.";
     return false;
   }
-  const std::string& odex_file_name = *OdexFileName();
+  const std::string& input_file_name = *input_file;
 
   if (OatFileName() == nullptr) {
     *error_msg = "Patching of oat file for dex location "
@@ -628,7 +629,7 @@ bool OatFileAssistant::RelocateOatFile(std::string* error_msg) {
   std::vector<std::string> argv;
   argv.push_back(runtime->GetPatchoatExecutable());
   argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(isa_)));
-  argv.push_back("--input-oat-file=" + odex_file_name);
+  argv.push_back("--input-oat-file=" + input_file_name);
   argv.push_back("--output-oat-file=" + oat_file_name);
   argv.push_back("--patched-image-location=" + image_info->location);
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f2abcf99d3..9e7c2efc45 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -43,20 +43,43 @@ namespace art {
 // be restored and tested, or removed.
 class OatFileAssistant {
  public:
-  enum Status {
-    // kOutOfDate - An oat file is said to be out of date if the file does not
-    // exist, or is out of date with respect to the dex file or boot image.
-    kOutOfDate,
-
-    // kNeedsRelocation - An oat file is said to need relocation if the code
-    // is up to date, but not yet properly relocated for address space layout
-    // randomization (ASLR). In this case, the oat file is neither "out of
-    // date" nor "up to date".
-    kNeedsRelocation,
-
-    // kUpToDate - An oat file is said to be up to date if it is not out of
+  enum DexOptNeeded {
+    // kNoDexOptNeeded - The code for this dex location is up to date and can
+    // be used as is.
+    // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
+    kNoDexOptNeeded = 0,
+
+    // kDex2OatNeeded - In order to make the code for this dex location up to
+    // date, dex2oat must be run on the dex file.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1
+    kDex2OatNeeded = 1,
+
+    // kPatchOatNeeded - In order to make the code for this dex location up to
+    // date, patchoat must be run on the odex file.
+    // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2
+    kPatchOatNeeded = 2,
+
+    // kSelfPatchOatNeeded - In order to make the code for this dex location
+    // up to date, patchoat must be run on the oat file.
+    // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3
+    kSelfPatchOatNeeded = 3,
+  };
+
+  enum OatStatus {
+    // kOatOutOfDate - An oat file is said to be out of date if the file does
+    // not exist, or is out of date with respect to the dex file or boot
+    // image.
+    kOatOutOfDate,
+
+    // kOatNeedsRelocation - An oat file is said to need relocation if the
+    // code is up to date, but not yet properly relocated for address space
+    // layout randomization (ASLR). In this case, the oat file is neither
+    // "out of date" nor "up to date".
+    kOatNeedsRelocation,
+
+    // kOatUpToDate - An oat file is said to be up to date if it is not out of
     // date and has been properly relocated for the purposes of ASLR.
-    kUpToDate,
+    kOatUpToDate,
   };
 
   // Constructs an OatFileAssistant object to assist the oat file
@@ -67,7 +90,6 @@ class OatFileAssistant {
   // Typically the dex_location is the absolute path to the original,
   // un-optimized dex file.
   //
-  //
   // Note: Currently the dex_location must have an extension.
   // TODO: Relax this restriction?
   //
@@ -121,8 +143,9 @@ class OatFileAssistant {
   // file.
   bool Lock(std::string* error_msg);
 
-  // Returns the overall compilation status for the given dex location.
-  Status GetStatus();
+  // Return what action needs to be taken to produce up-to-date code for this
+  // dex location.
+  DexOptNeeded GetDexOptNeeded();
 
   // Attempts to generate or relocate the oat file as needed to make it up to
   // date.
@@ -164,7 +187,7 @@ class OatFileAssistant {
   //    determined.
   const std::string* OdexFileName();
   bool OdexFileExists();
-  Status OdexFileStatus();
+  OatStatus OdexFileStatus();
   bool OdexFileIsOutOfDate();
   bool OdexFileNeedsRelocation();
   bool OdexFileIsUpToDate();
@@ -176,20 +199,18 @@ class OatFileAssistant {
   // the dex location.
   //
   // Notes:
-  //  * To get the overall status of the compiled code for this dex_location,
-  //    use the GetStatus() method, not the OatFileStatus() method.
   //  * OatFileName may return null if the oat file name could not be
   //    determined.
   const std::string* OatFileName();
   bool OatFileExists();
-  Status OatFileStatus();
+  OatStatus OatFileStatus();
   bool OatFileIsOutOfDate();
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
 
   // These methods return the status for a given opened oat file with respect
   // to the dex location.
-  Status GivenOatFileStatus(const OatFile& file);
+  OatStatus GivenOatFileStatus(const OatFile& file);
   bool GivenOatFileIsOutOfDate(const OatFile& file);
   bool GivenOatFileNeedsRelocation(const OatFile& file);
   bool GivenOatFileIsUpToDate(const OatFile& file);
@@ -216,7 +237,7 @@ class OatFileAssistant {
   // Copy the current profile to the old profile location.
   void CopyProfileFile();
 
-  // Generates the oat file by relocation from the odex file.
+  // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
   // oat file.
   // Returns true on success.
@@ -224,7 +245,7 @@ class OatFileAssistant {
   //
   // If there is a failure, the value of error_msg will be set to a string
   // describing why there was failure. error_msg must not be nullptr.
-  bool RelocateOatFile(std::string* error_msg);
+  bool RelocateOatFile(const std::string* input_file, std::string* error_msg);
 
   // Generate the oat file from the dex file.
   // This does not check the current status before attempting to generate the
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index a1988244c7..d2362a210e 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -29,7 +29,9 @@
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
 #include "mem_map.h"
+#include "mirror/art_field-inl.h"
 #include "os.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 #include "utils.h"
 
@@ -267,42 +269,42 @@ static void GenerateOatForTest(const char* dex_location) {
 }
 
 // Case: We have a DEX file, but no OAT file for it.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, DexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OatFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
 }
 
 // Case: We have no DEX file and no OAT file.
-// Expect: Status is out of date. Loading should fail, but not crash.
+// Expect: Status is kDex2OatNeeded. Loading should fail, but not crash.
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   EXPECT_EQ(nullptr, oat_file.get());
 }
 
 // Case: We have a DEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, OatUpToDate) {
   std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
@@ -310,7 +312,7 @@ TEST_F(OatFileAssistantTest, OatUpToDate) {
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -319,18 +321,20 @@ TEST_F(OatFileAssistantTest, OatUpToDate) {
   EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
   EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.OatFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
 }
 
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str());
 
-  // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  // Verify we can load both dex files.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -341,7 +345,7 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
 
 // Case: We have a MultiDEX file and up-to-date OAT file for it with relative
 // encoded dex locations.
-// Expect: The oat file status is kUpToDate.
+// Expect: The oat file status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
   std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
   std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat";
@@ -370,8 +374,8 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
   EXPECT_EQ(2u, dex_files.size());
 }
 
-// Case: We have a DEX file and out of date OAT file.
-// Expect: The oat file status is kOutOfDate.
+// Case: We have a DEX file and out-of-date OAT file.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, OatOutOfDate) {
   std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar";
 
@@ -382,7 +386,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) {
   Copy(GetDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -394,7 +398,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) {
 }
 
 // Case: We have a DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, DexOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
@@ -406,21 +410,20 @@ TEST_F(OatFileAssistantTest, DexOdexNoOat) {
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
   EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.OdexFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
 }
 
 // Case: We have a stripped DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded
 TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/StrippedDexOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/StrippedDexOdexNoOat.odex";
@@ -435,7 +438,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -449,7 +452,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -468,8 +471,8 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a stripped DEX file, an ODEX file, and an out of date OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Case: We have a stripped DEX file, an ODEX file, and an out-of-date OAT file.
+// Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
   std::string dex_location = GetScratchDir() + "/StrippedDexOdexOat.jar";
   std::string odex_location = GetOdexDir() + "/StrippedDexOdexOat.odex";
@@ -488,7 +491,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -503,7 +506,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -524,9 +527,59 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
   EXPECT_EQ(1u, dex_files.size());
 }
 
+// Case: We have a DEX file, no ODEX file and an OAT file that needs
+// relocation.
+// Expect: The status is kSelfPatchOatNeeded.
+TEST_F(OatFileAssistantTest, SelfRelocation) {
+  std::string dex_location = GetScratchDir() + "/SelfRelocation.jar";
+  std::string oat_location = GetOdexDir() + "/SelfRelocation.oat";
+
+  // Create the dex and odex files
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOdexForTest(dex_location, oat_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      oat_location.c_str(), kRuntimeISA, true);
+
+  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+
+  // Make the oat file up to date.
+  std::string error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.OatFileExists());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(1u, dex_files.size());
+}
+
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
 // OAT files both have patch delta of 0.
-// Expect: It shouldn't crash.
+// Expect: It shouldn't crash, and status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
   std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
@@ -544,7 +597,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -564,7 +617,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) {
 }
 
 // Case: We have a DEX file and a PIC ODEX file, but no OAT file.
-// Expect: The oat file status is kUpToDate, because PIC needs no relocation.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
 TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex";
@@ -576,7 +629,7 @@ TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -661,7 +714,7 @@ TEST_F(OatFileAssistantTest, NonExsistentDexLocation) {
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -720,7 +773,7 @@ TEST_F(OatFileAssistantTest, NonAbsoluteDexLocation) {
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -737,7 +790,7 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) {
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -751,14 +804,14 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) {
 }
 
 // Case: Non-standard extension for dex file.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, LongDexExtension) {
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -895,6 +948,41 @@ TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) {
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
+// Verify the dexopt status values from dalvik.system.DexFile
+// match the OatFileAssistant::DexOptStatus values.
+TEST_F(OatFileAssistantTest, DexOptStatusValues) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  Handle<mirror::Class> dexfile(
+      hs.NewHandle(linker->FindSystemClass(soa.Self(), "Ldalvik/system/DexFile;")));
+  ASSERT_FALSE(dexfile.Get() == nullptr);
+  linker->EnsureInitialized(soa.Self(), dexfile, true, true);
+
+  mirror::ArtField* no_dexopt_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I");
+  ASSERT_FALSE(no_dexopt_needed == nullptr);
+  EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get()));
+
+  mirror::ArtField* dex2oat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "DEX2OAT_NEEDED", "I");
+  ASSERT_FALSE(dex2oat_needed == nullptr);
+  EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get()));
+
+  mirror::ArtField* patchoat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "PATCHOAT_NEEDED", "I");
+  ASSERT_FALSE(patchoat_needed == nullptr);
+  EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get()));
+
+  mirror::ArtField* self_patchoat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I");
+  ASSERT_FALSE(self_patchoat_needed == nullptr);
+  EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get()));
+}
 
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 3d0f074f94..708f61f028 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -104,6 +104,16 @@ public class Main {
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
         System.out.println("Unexpectedly not succeeding compareAndSwap...");
     }
+
+    if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
   }
 
   private static class TestClass {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index f0fe934ae9..6a6227cee5 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -379,7 +379,7 @@ class Main extends IntMathBase {
      */
     static int lit16Test(int x) {
 
-        int[] results = new int[8];
+        int[] results = new int[10];
 
         /* try to generate op-int/lit16" instructions */
         results[0] = x + 1000;
@@ -390,6 +390,9 @@ class Main extends IntMathBase {
         results[5] = x & 1000;
         results[6] = x | -1000;
         results[7] = x ^ -1000;
+        /* use an 16-bit constant that has its MSB (bit-15) set */
+        results[8] = x / 32769;
+        results[9] = x / -32769;
 
         if (results[0] != 78777) { return 1; }
         if (results[1] != -76777) { return 2; }
@@ -399,6 +402,8 @@ class Main extends IntMathBase {
         if (results[5] != 960) { return 6; }
         if (results[6] != -39) { return 7; }
         if (results[7] != -76855) { return 8; }
+        if (results[8] != 2) { return 9; }
+        if (results[9] != -2) { return 10; }
         return 0;
     }