Merge "ART: Add guards to the dex cache and its shortcuts"
diff --git a/Android.mk b/Android.mk
index 451edf1..15e8308 100644
--- a/Android.mk
+++ b/Android.mk
@@ -67,8 +67,13 @@
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 endif
+ifneq ($(TMPDIR),)
+	rm -rf $(TMPDIR)/$(USER)/test-*/dalvik-cache/*
+	rm -rf $(TMPDIR)/android-data/dalvik-cache/*
+else
 	rm -rf /tmp/$(USER)/test-*/dalvik-cache/*
 	rm -rf /tmp/android-data/dalvik-cache/*
+endif
 
 .PHONY: clean-oat-target
 clean-oat-target:
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 250924a..5cf846f 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -38,9 +38,6 @@
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file);
 
-// Hack for CFI CIE initialization
-extern std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64);
-
 void QuickCompiler::Init() const {
   ArtInitQuickCompilerContext(GetCompilerDriver());
 }
@@ -126,17 +123,6 @@
   return mir_to_lir;
 }
 
-std::vector<uint8_t>* QuickCompiler::GetCallFrameInformationInitialization(
-    const CompilerDriver& driver) const {
-  if (driver.GetInstructionSet() == kX86) {
-    return X86CFIInitialization(false);
-  }
-  if (driver.GetInstructionSet() == kX86_64) {
-    return X86CFIInitialization(true);
-  }
-  return nullptr;
-}
-
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
diff --git a/compiler/compilers.h b/compiler/compilers.h
index 2c231e1..151bf6f 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -56,17 +56,6 @@
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE {}
 
-  /*
-   * @brief Generate and return Dwarf CFI initialization, if supported by the
-   * backend.
-   * @param driver CompilerDriver for this compile.
-   * @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF
-   * information.
-   * @note This is used for backtrace information in generated code.
-   */
-  std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver) const
-      OVERRIDE;
-
  private:
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
 };
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 2a51b49..9f60427 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -394,6 +394,18 @@
   return nullptr;
 }
 
+/* Search the existing constants in the literal pool for an exact class match */
+LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
+  while (data_target) {
+    if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
+        UnwrapPointer(data_target->operands[1]) == &dex_file) {
+      return data_target;
+    }
+    data_target = data_target->next;
+  }
+  return nullptr;
+}
+
 /*
  * The following are building blocks to insert constants into the pool or
  * instruction streams.
@@ -492,10 +504,13 @@
   data_lir = class_literal_list_;
   while (data_lir != NULL) {
     uint32_t target_method_idx = data_lir->operands[0];
+    const DexFile* class_dex_file =
+      reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     cu_->compiler_driver->AddClassPatch(cu_->dex_file,
                                         cu_->class_def_idx,
                                         cu_->method_idx,
                                         target_method_idx,
+                                        class_dex_file,
                                         code_buffer_.size());
     const DexFile::TypeId& target_method_id = cu_->dex_file->GetTypeId(target_method_idx);
     // unique value based on target to ensure code deduplication works
@@ -1222,12 +1237,14 @@
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
 
-void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                            SpecialTargetRegister symbolic_reg) {
   // Use the literal pool and a PC-relative load from a data word.
-  LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+  LIR* data_target = ScanLiteralPoolClass(class_literal_list_, dex_file, type_idx);
   if (data_target == nullptr) {
     data_target = AddWordData(&class_literal_list_, type_idx);
   }
+  data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
   // Loads a Class pointer, which is a reference as it lives in the heap.
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
   AppendLIR(load_pc_rel);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 7abf3e7..dbceaff 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -53,7 +53,7 @@
     true,   // kIntrinsicRint
     true,   // kIntrinsicRoundFloat
     true,   // kIntrinsicRoundDouble
-    false,  // kIntrinsicGet
+    false,  // kIntrinsicReferenceGet
     false,  // kIntrinsicCharAt
     false,  // kIntrinsicCompareTo
     false,  // kIntrinsicIsEmptyOrLength
@@ -85,7 +85,7 @@
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRint], Rint_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundFloat], RoundFloat_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundDouble], RoundDouble_must_be_static);
-COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicGet], Get_must_not_be_static);
+COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicReferenceGet], Get_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCharAt], CharAt_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCompareTo], CompareTo_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], IsEmptyOrLength_must_not_be_static);
@@ -169,7 +169,7 @@
     "floor",                 // kNameCacheFloor
     "rint",                  // kNameCacheRint
     "round",                 // kNameCacheRound
-    "get",                   // kNameCacheGet
+    "get",                   // kNameCacheReferenceGet
     "charAt",                // kNameCacheCharAt
     "compareTo",             // kNameCacheCompareTo
     "isEmpty",               // kNameCacheIsEmpty
@@ -339,7 +339,7 @@
     INTRINSIC(JavaLangMath,       Round, D_J, kIntrinsicRoundDouble, 0),
     INTRINSIC(JavaLangStrictMath, Round, D_J, kIntrinsicRoundDouble, 0),
 
-    INTRINSIC(JavaLangRefReference, Get, _Object, kIntrinsicGet, 0),
+    INTRINSIC(JavaLangRefReference, ReferenceGet, _Object, kIntrinsicReferenceGet, 0),
 
     INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
     INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
@@ -471,8 +471,8 @@
       return backend->GenInlinedRound(info, false /* is_double */);
     case kIntrinsicRoundDouble:
       return backend->GenInlinedRound(info, true /* is_double */);
-    case kIntrinsicGet:
-      return backend->GenInlinedGet(info);
+    case kIntrinsicReferenceGet:
+      return backend->GenInlinedReferenceGet(info);
     case kIntrinsicCharAt:
       return backend->GenInlinedCharAt(info);
     case kIntrinsicCompareTo:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 1bd3c48..b875e2b 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -145,7 +145,7 @@
       kNameCacheFloor,
       kNameCacheRint,
       kNameCacheRound,
-      kNameCacheGet,
+      kNameCacheReferenceGet,
       kNameCacheCharAt,
       kNameCacheCompareTo,
       kNameCacheIsEmpty,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index f6c77fc..3f22913 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -361,7 +361,7 @@
                                    &direct_type_ptr, &is_finalizable)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
+        LoadClassType(*dex_file, type_idx, kArg0);
         CallRuntimeHelperRegMethodRegLocation(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide),
                                               rl_src, true);
       } else {
@@ -961,7 +961,7 @@
                                    !is_finalizable) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
+        LoadClassType(*dex_file, type_idx, kArg0);
         if (!is_type_initialized) {
           CallRuntimeHelperRegMethod(kQuickAllocObjectResolved, TargetReg(kArg0, kRef), true);
         } else {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e70b0c5..3cfc9a6 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -25,10 +25,8 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/reference-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
-#include "scoped_thread_state_change.h"
 #include "x86/codegen_x86.h"
 
 namespace art {
@@ -1129,63 +1127,32 @@
   return res;
 }
 
-bool Mir2Lir::GenInlinedGet(CallInfo* info) {
+bool Mir2Lir::GenInlinedReferenceGet(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
     return false;
   }
 
-  // the refrence class is stored in the image dex file which might not be the same as the cu's
-  // dex file. Query the reference class for the image dex file then reset to starting dex file
-  // in after loading class type.
-  uint16_t type_idx = 0;
-  const DexFile* ref_dex_file = nullptr;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    type_idx = mirror::Reference::GetJavaLangRefReference()->GetDexTypeIndex();
-    ref_dex_file = mirror::Reference::GetJavaLangRefReference()->GetDexCache()->GetDexFile();
-  }
-  CHECK(LIKELY(ref_dex_file != nullptr));
-
-  // address is either static within the image file, or needs to be patched up after compilation.
-  bool unused_type_initialized;
   bool use_direct_type_ptr;
   uintptr_t direct_type_ptr;
-  bool is_finalizable;
-  const DexFile* old_dex = cu_->dex_file;
-  cu_->dex_file = ref_dex_file;
+  ClassReference ref;
+  if (!cu_->compiler_driver->CanEmbedReferenceTypeInCode(&ref,
+        &use_direct_type_ptr, &direct_type_ptr)) {
+    return false;
+  }
+
   RegStorage reg_class = TargetReg(kArg1, kRef);
   Clobber(reg_class);
   LockTemp(reg_class);
-  if (!cu_->compiler_driver->CanEmbedTypeInCode(*ref_dex_file, type_idx, &unused_type_initialized,
-                                                &use_direct_type_ptr, &direct_type_ptr,
-                                                &is_finalizable) || is_finalizable) {
-    cu_->dex_file = old_dex;
-    // address is not known and post-compile patch is not possible, cannot insert intrinsic.
-    return false;
-  }
   if (use_direct_type_ptr) {
     LoadConstant(reg_class, direct_type_ptr);
-  } else if (cu_->dex_file == old_dex) {
-    // TODO: Bug 16656190 If cu_->dex_file != old_dex the patching could retrieve the wrong class
-    // since the load class is indexed only by the type_idx. We should include which dex file a
-    // class is from in the LoadClassType LIR.
-    LoadClassType(type_idx, kArg1);
   } else {
-    cu_->dex_file = old_dex;
-    return false;
+    uint16_t type_idx = ref.first->GetClassDef(ref.second).class_idx_;
+    LoadClassType(*ref.first, type_idx, kArg1);
   }
-  cu_->dex_file = old_dex;
 
-  // get the offset for flags in reference class.
-  uint32_t slow_path_flag_offset = 0;
-  uint32_t disable_flag_offset = 0;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
-    slow_path_flag_offset = reference_class->GetSlowPathFlagOffset().Uint32Value();
-    disable_flag_offset = reference_class->GetDisableIntrinsicFlagOffset().Uint32Value();
-  }
+  uint32_t slow_path_flag_offset = cu_->compiler_driver->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = cu_->compiler_driver->GetReferenceDisableFlagOffset();
   CHECK(slow_path_flag_offset && disable_flag_offset &&
         (slow_path_flag_offset != disable_flag_offset));
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 0e6f36b..2221bb5 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -685,6 +685,7 @@
     LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta);
     LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi);
     LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method);
+    LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx);
     LIR* AddWordData(LIR* *constant_list_p, int value);
     LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
     void ProcessSwitchTables();
@@ -958,7 +959,7 @@
      */
     RegLocation InlineTargetWide(CallInfo* info);
 
-    bool GenInlinedGet(CallInfo* info);
+    bool GenInlinedReferenceGet(CallInfo* info);
     virtual bool GenInlinedCharAt(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
     virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
@@ -1113,11 +1114,13 @@
 
     /*
      * @brief Load the Class* of a Dex Class type into the register.
+     * @param dex DexFile that contains the class type.
      * @param type How the method will be invoked.
      * @param register that will contain the code address.
      * @note register will be passed to TargetReg to get physical register.
      */
-    virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+    virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                               SpecialTargetRegister symbolic_reg);
 
     // Routines that work for the generic case, but may be overriden by target.
     /*
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d74caae..d3ed48d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -319,11 +319,13 @@
 
   /*
    * @brief Load the Class* of a Dex Class type into the register.
+   * @param dex DexFile that contains the class type.
    * @param type How the method will be invoked.
    * @param register that will contain the code address.
    * @note register will be passed to TargetReg to get physical register.
    */
-  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) OVERRIDE;
+  void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                     SpecialTargetRegister symbolic_reg) OVERRIDE;
 
   void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
 
@@ -355,12 +357,6 @@
   void InstallLiteralPools() OVERRIDE;
 
   /*
-   * @brief Generate the debug_frame CFI information.
-   * @returns pointer to vector containing CFE information
-   */
-  static std::vector<uint8_t>* ReturnCommonCallFrameInformation(bool is_x86_64);
-
-  /*
    * @brief Generate the debug_frame FDE information.
    * @returns pointer to vector containing CFE information
    */
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index bd2e0f3..69f3e67 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -971,19 +971,21 @@
   method_address_insns_.Insert(move);
 }
 
-void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                               SpecialTargetRegister symbolic_reg) {
   /*
    * For x86, just generate a 32 bit move immediate instruction, that will be filled
    * in at 'link time'.  For now, put a unique value based on target to ensure that
    * code deduplication works.
    */
-  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
+  const DexFile::TypeId& id = dex_file.GetTypeId(type_idx);
   uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
 
   // Generate the move instruction with the unique pointer and save index and type.
   LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
                      TargetReg(symbolic_reg, kNotWide).GetReg(),
-                     static_cast<int>(ptr), type_idx);
+                     static_cast<int>(ptr), type_idx,
+                     WrapPointer(const_cast<DexFile*>(&dex_file)));
   AppendLIR(move);
   class_type_address_insns_.Insert(move);
 }
@@ -1068,12 +1070,16 @@
   for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
       LIR* p = class_type_address_insns_.Get(i);
       DCHECK_EQ(p->opcode, kX86Mov32RI);
+
+      const DexFile* class_dex_file =
+        reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
       uint32_t target_method_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
       cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
-                                          cu_->method_idx, target_method_idx, patch_offset);
+                                          cu_->method_idx, target_method_idx, class_dex_file,
+                                          patch_offset);
   }
 
   // And now the PC-relative calls to methods.
@@ -1437,11 +1443,6 @@
   }
 }
 
-
-std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64) {
-  return X86Mir2Lir::ReturnCommonCallFrameInformation(is_x86_64);
-}
-
 static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
   uint8_t buffer[12];
   uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
@@ -1458,84 +1459,6 @@
   }
 }
 
-std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation(bool is_x86_64) {
-  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  PushWord(*cfi_info, 0);
-
-  // CIE id: always 0.
-  PushWord(*cfi_info, 0);
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(*cfi_info, 1);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(*cfi_info, -8);
-  } else {
-    EncodeSignedLeb128(*cfi_info, -4);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-  cfi_info->push_back(0x03);
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  uint32_t length = cfi_info->size() - 4;
-  (*cfi_info)[0] = length;
-  (*cfi_info)[1] = length >> 8;
-  (*cfi_info)[2] = length >> 16;
-  (*cfi_info)[3] = length >> 24;
-  return cfi_info;
-}
-
 static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
   if (is_x86_64) {
     switch (art_reg_id) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6b0cc50..1233a0d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -353,7 +353,6 @@
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
       support_boot_image_fixup_(instruction_set != kMips),
-      cfi_info_(nullptr),
       dedupe_code_("dedupe code"),
       dedupe_mapping_table_("dedupe mapping table"),
       dedupe_vmap_table_("dedupe vmap table"),
@@ -376,11 +375,6 @@
     CHECK(image_classes_.get() == nullptr);
   }
 
-  // Are we generating CFI information?
-  if (compiler_options->GetGenerateGDBInformation()) {
-    cfi_info_.reset(compiler_->GetCallFrameInformationInitialization(*this));
-  }
-
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
     profile_present_ = profile_file_.LoadFile(profile_file);
@@ -597,7 +591,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, thread_pool, timings);
+    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -973,6 +967,43 @@
   }
 }
 
+bool CompilerDriver::CanEmbedReferenceTypeInCode(ClassReference* ref,
+                                                 bool* use_direct_ptr,
+                                                 uintptr_t* direct_type_ptr) {
+  CHECK(ref != nullptr);
+  CHECK(use_direct_ptr != nullptr);
+  CHECK(direct_type_ptr != nullptr);
+
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
+  bool is_initialized;
+  bool unused_finalizable;
+  // Make sure we have a finished Reference class object before attempting to use it.
+  if (!CanEmbedTypeInCode(*reference_class->GetDexCache()->GetDexFile(),
+                          reference_class->GetDexTypeIndex(), &is_initialized,
+                          use_direct_ptr, direct_type_ptr, &unused_finalizable) ||
+      !is_initialized) {
+    return false;
+  }
+  ref->first = &reference_class->GetDexFile();
+  ref->second = reference_class->GetDexClassDefIndex();
+  return true;
+}
+
+uint32_t CompilerDriver::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 void CompilerDriver::ProcessedInstanceField(bool resolved) {
   if (!resolved) {
     stats_->UnresolvedInstanceField();
@@ -1344,12 +1375,14 @@
                                     uint16_t referrer_class_def_idx,
                                     uint32_t referrer_method_idx,
                                     uint32_t target_type_idx,
+                                    const DexFile* target_type_dex_file,
                                     size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
   classes_to_patch_.push_back(new TypePatchInformation(dex_file,
                                                        referrer_class_def_idx,
                                                        referrer_method_idx,
                                                        target_type_idx,
+                                                       target_type_dex_file,
                                                        literal_offset));
 }
 
@@ -1361,12 +1394,14 @@
                              jobject class_loader,
                              CompilerDriver* compiler,
                              const DexFile* dex_file,
+                             const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool)
     : index_(0),
       class_linker_(class_linker),
       class_loader_(class_loader),
       compiler_(compiler),
       dex_file_(dex_file),
+      dex_files_(dex_files),
       thread_pool_(thread_pool) {}
 
   ClassLinker* GetClassLinker() const {
@@ -1388,6 +1423,10 @@
     return dex_file_;
   }
 
+  const std::vector<const DexFile*>& GetDexFiles() const {
+    return dex_files_;
+  }
+
   void ForAll(size_t begin, size_t end, Callback callback, size_t work_units) {
     Thread* self = Thread::Current();
     self->AssertNoPendingException();
@@ -1445,11 +1484,24 @@
   const jobject class_loader_;
   CompilerDriver* const compiler_;
   const DexFile* const dex_file_;
+  const std::vector<const DexFile*>& dex_files_;
   ThreadPool* const thread_pool_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelCompilationManager);
 };
 
+static bool SkipClassCheckClassPath(const char* descriptor, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& classpath) {
+  DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, classpath);
+  CHECK(pair.second != NULL);
+  if (pair.first != &dex_file) {
+    LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation()
+                 << " previously found in " << pair.first->GetLocation();
+    return true;
+  }
+  return false;
+}
+
 // Return true if the class should be skipped during compilation.
 //
 // The first case where we skip is for redundant class definitions in
@@ -1458,20 +1510,23 @@
 // The second case where we skip is when an app bundles classes found
 // in the boot classpath. Since at runtime we will select the class from
 // the boot classpath, we ignore the one from the app.
+//
+// The third case is if the app itself has the class defined in multiple dex files. Then we skip
+// it if it is not the first occurrence.
 static bool SkipClass(ClassLinker* class_linker, jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       const DexFile::ClassDef& class_def) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
+
   if (class_loader == NULL) {
-    DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, class_linker->GetBootClassPath());
-    CHECK(pair.second != NULL);
-    if (pair.first != &dex_file) {
-      LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation()
-                   << " previously found in " << pair.first->GetLocation();
-      return true;
-    }
-    return false;
+    return SkipClassCheckClassPath(descriptor, dex_file, class_linker->GetBootClassPath());
   }
-  return class_linker->IsInBootClassPath(descriptor);
+
+  if (class_linker->IsInBootClassPath(descriptor)) {
+    return true;
+  }
+
+  return SkipClassCheckClassPath(descriptor, dex_file, dex_files);
 }
 
 // A fast version of SkipClass above if the class pointer is available
@@ -1529,7 +1584,7 @@
   // definitions, since many of them many never be referenced by
   // generated code.
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
-  if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
+  if (!SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) {
     ScopedObjectAccess soa(self);
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
@@ -1636,13 +1691,15 @@
 }
 
 void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& dex_files,
                                     ThreadPool* thread_pool, TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
   //       and method names.
 
-  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   if (IsImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
@@ -1659,7 +1716,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    VerifyDexFile(class_loader, *dex_file, thread_pool, timings);
+    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1711,10 +1768,12 @@
 }
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+                                   const std::vector<const DexFile*>& dex_files,
                                    ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_);
 }
 
@@ -1804,10 +1863,12 @@
 }
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+                                       const std::vector<const DexFile*>& dex_files,
                                        ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   size_t thread_count;
   if (IsImage()) {
     // TODO: remove this when transactional mode supports multithreading.
@@ -1828,7 +1889,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    InitializeClasses(class_loader, *dex_file, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1837,7 +1898,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    CompileDexFile(class_loader, *dex_file, thread_pool, timings);
+    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1847,7 +1908,7 @@
   const DexFile& dex_file = *manager->GetDexFile();
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
   ClassLinker* class_linker = manager->GetClassLinker();
-  if (SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
+  if (SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) {
     return;
   }
   ClassReference ref(&dex_file, class_def_index);
@@ -1916,10 +1977,11 @@
 }
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& dex_files,
                                     ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
-                                     &dex_file, thread_pool);
+                                     &dex_file, dex_files, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_);
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 6dae398..2a5cdb9 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -213,6 +213,12 @@
                           bool* is_type_initialized, bool* use_direct_type_ptr,
                           uintptr_t* direct_type_ptr, bool* out_is_finalizable);
 
+  // Query methods for the java.lang.ref.Reference class.
+  bool CanEmbedReferenceTypeInCode(ClassReference* ref,
+                                   bool* use_direct_type_ptr, uintptr_t* direct_type_ptr);
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
   // Get the DexCache for the
   mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -356,6 +362,7 @@
                      uint16_t referrer_class_def_idx,
                      uint32_t referrer_method_idx,
                      uint32_t target_method_idx,
+                     const DexFile* target_dex_file,
                      size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
 
@@ -402,10 +409,6 @@
     return dump_passes_;
   }
 
-  bool DidIncludeDebugSymbols() const {
-    return compiler_options_->GetIncludeDebugSymbols();
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -549,6 +552,10 @@
 
   class TypePatchInformation : public PatchInformation {
    public:
+    const DexFile& GetTargetTypeDexFile() const {
+      return *target_type_dex_file_;
+    }
+
     uint32_t GetTargetTypeIdx() const {
       return target_type_idx_;
     }
@@ -565,13 +572,15 @@
                          uint16_t referrer_class_def_idx,
                          uint32_t referrer_method_idx,
                          uint32_t target_type_idx,
+                         const DexFile* target_type_dex_file,
                          size_t literal_offset)
         : PatchInformation(dex_file, referrer_class_def_idx,
                            referrer_method_idx, literal_offset),
-          target_type_idx_(target_type_idx) {
+          target_type_idx_(target_type_idx), target_type_dex_file_(target_type_dex_file) {
     }
 
     const uint32_t target_type_idx_;
+    const DexFile* target_type_dex_file_;
 
     friend class CompilerDriver;
     DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
@@ -599,14 +608,6 @@
   std::vector<uint8_t>* DeduplicateGCMap(const std::vector<uint8_t>& code);
   std::vector<uint8_t>* DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info);
 
-  /*
-   * @brief return the pointer to the Call Frame Information.
-   * @return pointer to call frame information for this compilation.
-   */
-  std::vector<uint8_t>* GetCallFrameInformation() const {
-    return cfi_info_.get();
-  }
-
   ProfileFile profile_file_;
   bool profile_present_;
 
@@ -658,12 +659,14 @@
                ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
               ThreadPool* thread_pool, TimingLogger* timings);
   void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+                     const std::vector<const DexFile*>& dex_files,
                      ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
@@ -671,6 +674,7 @@
                          ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
                          ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_);
 
@@ -681,6 +685,7 @@
   void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                ThreadPool* thread_pool, TimingLogger* timings);
   void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
@@ -766,9 +771,6 @@
 
   bool support_boot_image_fixup_;
 
-  // Call Frame Information, which might be generated to help stack tracebacks.
-  std::unique_ptr<std::vector<uint8_t>> cfi_info_;
-
   // DeDuplication data structures, these own the corresponding byte arrays.
   class DedupeHashFunc {
    public:
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
index 137110f..9ae755d 100644
--- a/compiler/elf_patcher.cc
+++ b/compiler/elf_patcher.cc
@@ -99,11 +99,13 @@
 mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
-  mirror::Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+          patch->GetTargetTypeDexFile())));
+  mirror::Class* klass = class_linker->ResolveType(patch->GetTargetTypeDexFile(),
+                                                   patch->GetTargetTypeIdx(),
                                                    dex_cache, NullHandle<mirror::ClassLoader>());
   CHECK(klass != NULL)
-    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
+    << patch->GetTargetTypeDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
   CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
     << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
@@ -152,7 +154,7 @@
     }
     if (patch->IsType()) {
       const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
-      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
+      const DexFile::TypeId& id = tpatch->GetTargetTypeDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
       uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
       uint32_t actual = *patch_location;
       CHECK(actual == expected || actual == value) << "Patching type failed: " << std::hex
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 1fde12e..71f02d3 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -24,6 +24,7 @@
 #include "elf_utils.h"
 #include "file_output_stream.h"
 #include "globals.h"
+#include "leb128.h"
 #include "oat.h"
 #include "oat_writer.h"
 #include "utils.h"
@@ -38,6 +39,25 @@
   return ((binding) << 4) + ((type) & 0xf);
 }
 
+static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) {
+  (*buf)[offset+0] = data;
+  (*buf)[offset+1] = data >> 8;
+  (*buf)[offset+2] = data >> 16;
+  (*buf)[offset+3] = data >> 24;
+}
+
+static void PushWord(std::vector<uint8_t>* buf, int data) {
+  buf->push_back(data & 0xff);
+  buf->push_back((data >> 8) & 0xff);
+  buf->push_back((data >> 16) & 0xff);
+  buf->push_back((data >> 24) & 0xff);
+}
+
+static void PushHalf(std::vector<uint8_t>* buf, int data) {
+  buf->push_back(data & 0xff);
+  buf->push_back((data >> 8) & 0xff);
+}
+
 bool ElfWriterQuick::ElfBuilder::Write() {
   // The basic layout of the elf file. Order may be different in final output.
   // +-------------------------+
@@ -822,37 +842,131 @@
   }
 }
 
+static void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = UnsignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeUnsignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+static void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = SignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeSignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
+  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
+
+  // Length (will be filled in later in this routine).
+  PushWord(cfi_info, 0);
+
+  // CIE id: always 0.
+  PushWord(cfi_info, 0);
+
+  // Version: always 1.
+  cfi_info->push_back(0x01);
+
+  // Augmentation: 'zR\0'
+  cfi_info->push_back(0x7a);
+  cfi_info->push_back(0x52);
+  cfi_info->push_back(0x0);
+
+  // Code alignment: 1.
+  EncodeUnsignedLeb128(1, cfi_info);
+
+  // Data alignment.
+  if (is_x86_64) {
+    EncodeSignedLeb128(-8, cfi_info);
+  } else {
+    EncodeSignedLeb128(-4, cfi_info);
+  }
+
+  // Return address register.
+  if (is_x86_64) {
+    // R16(RIP)
+    cfi_info->push_back(0x10);
+  } else {
+    // R8(EIP)
+    cfi_info->push_back(0x08);
+  }
+
+  // Augmentation length: 1.
+  cfi_info->push_back(1);
+
+  // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
+  cfi_info->push_back(0x03);
+
+  // Initial instructions.
+  if (is_x86_64) {
+    // DW_CFA_def_cfa R7(RSP) 8.
+    cfi_info->push_back(0x0c);
+    cfi_info->push_back(0x07);
+    cfi_info->push_back(0x08);
+
+    // DW_CFA_offset R16(RIP) 1 (* -8).
+    cfi_info->push_back(0x90);
+    cfi_info->push_back(0x01);
+  } else {
+    // DW_CFA_def_cfa R4(ESP) 4.
+    cfi_info->push_back(0x0c);
+    cfi_info->push_back(0x04);
+    cfi_info->push_back(0x04);
+
+    // DW_CFA_offset R8(EIP) 1 (* -4).
+    cfi_info->push_back(0x88);
+    cfi_info->push_back(0x01);
+  }
+
+  // Padding to a multiple of 4
+  while ((cfi_info->size() & 3) != 0) {
+    // DW_CFA_nop is encoded as 0.
+    cfi_info->push_back(0);
+  }
+
+  // Set the length of the CIE inside the generated bytes.
+  uint32_t length = cfi_info->size() - 4;
+  (*cfi_info)[0] = length;
+  (*cfi_info)[1] = length >> 8;
+  (*cfi_info)[2] = length >> 16;
+  (*cfi_info)[3] = length >> 24;
+  return cfi_info;
+}
+
+std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
+  switch (isa) {
+    case kX86:
+      return ConstructCIEFrameX86(false);
+    case kX86_64:
+      return ConstructCIEFrameX86(true);
+
+    default:
+      // Not implemented.
+      return nullptr;
+  }
+}
+
 bool ElfWriterQuick::Write(OatWriter* oat_writer,
                            const std::vector<const DexFile*>& dex_files_unused,
                            const std::string& android_root_unused,
                            bool is_host_unused) {
-  const bool debug = false;
-  const bool add_symbols = oat_writer->DidAddSymbols();
+  constexpr bool debug = false;
   const OatHeader& oat_header = oat_writer->GetOatHeader();
   Elf32_Word oat_data_size = oat_header.GetExecutableOffset();
   uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
 
   ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0,
-                     oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug);
+                     oat_data_size, oat_data_size, oat_exec_size,
+                     compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(),
+                     debug);
 
-  if (add_symbols) {
-    AddDebugSymbols(builder, oat_writer, debug);
-  }
-
-  bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
-  if (generateDebugInformation) {
-    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder eh_frame(".eh_frame",  SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
-    eh_frame.SetBuffer(*compiler_driver_->GetCallFrameInformation());
-
-    FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
-                         debug_abbrev.GetBuffer(), debug_str.GetBuffer());
-    builder.RegisterRawSection(debug_info);
-    builder.RegisterRawSection(debug_abbrev);
-    builder.RegisterRawSection(eh_frame);
-    builder.RegisterRawSection(debug_str);
+  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    WriteDebugSymbols(builder, oat_writer);
   }
 
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
@@ -865,32 +979,62 @@
   return builder.Write();
 }
 
-void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) {
+void ElfWriterQuick::WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer) {
+  std::unique_ptr<std::vector<uint8_t>> cfi_info(
+      ConstructCIEFrame(compiler_driver_->GetInstructionSet()));
+
+  // Iterate over the compiled methods.
   const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
   ElfSymtabBuilder* symtab = &builder.symtab_builder_;
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
     symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true,
                       it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+
+    // Include CFI for compiled method, if possible.
+    if (cfi_info.get() != nullptr) {
+      DCHECK(it->compiled_method_ != nullptr);
+
+      // Copy in the FDE, if present
+      const std::vector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
+      if (fde != nullptr) {
+        // Copy the information into cfi_info and then fix the address in the new copy.
+        int cur_offset = cfi_info->size();
+        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+
+        // Set the 'CIE_pointer' field to cur_offset+4.
+        uint32_t CIE_pointer = cur_offset + 4;
+        uint32_t offset_to_update = cur_offset + sizeof(uint32_t);
+        (*cfi_info)[offset_to_update+0] = CIE_pointer;
+        (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
+        (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
+        (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
+
+        // Set the 'initial_location' field to address the start of the method.
+        offset_to_update = cur_offset + 2*sizeof(uint32_t);
+        const uint32_t quick_code_start = it->low_pc_;
+        (*cfi_info)[offset_to_update+0] = quick_code_start;
+        (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
+        (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
+        (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
+      }
+    }
   }
-}
 
-static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) {
-  (*buf)[offset+0] = data;
-  (*buf)[offset+1] = data >> 8;
-  (*buf)[offset+2] = data >> 16;
-  (*buf)[offset+3] = data >> 24;
-}
+  if (cfi_info.get() != nullptr) {
+    // Now lay down the Elf sections.
+    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder eh_frame(".eh_frame",  SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+    eh_frame.SetBuffer(std::move(*cfi_info.get()));
 
-static void PushWord(std::vector<uint8_t>*buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-  buf->push_back((data >> 16) & 0xff);
-  buf->push_back((data >> 24) & 0xff);
-}
-
-static void PushHalf(std::vector<uint8_t>*buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
+    FillInCFIInformation(oat_writer, debug_info.GetBuffer(), debug_abbrev.GetBuffer(),
+                         debug_str.GetBuffer());
+    builder.RegisterRawSection(debug_info);
+    builder.RegisterRawSection(debug_abbrev);
+    builder.RegisterRawSection(eh_frame);
+    builder.RegisterRawSection(debug_str);
+  }
 }
 
 void ElfWriterQuick::FillInCFIInformation(OatWriter* oat_writer,
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index a0d36df..8cfe550 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -48,9 +48,7 @@
   ~ElfWriterQuick() {}
 
   class ElfBuilder;
-  void AddDebugSymbols(ElfBuilder& builder,
-                       OatWriter* oat_writer,
-                       bool debug);
+  void WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer);
   void ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug);
 
   class ElfSectionBuilder {
@@ -126,7 +124,7 @@
         : ElfSectionBuilder(sec_name, type, flags, link, info, align, entsize) {}
     ~ElfRawSectionBuilder() {}
     std::vector<uint8_t>* GetBuffer() { return &buf_; }
-    void SetBuffer(std::vector<uint8_t> buf) { buf_ = buf; }
+    void SetBuffer(std::vector<uint8_t>&& buf) { buf_ = buf; }
 
    protected:
     std::vector<uint8_t> buf_;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 9da59ab..1ba5d32 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -357,7 +357,6 @@
         uint32_t thumb_offset = compiled_method->CodeDelta();
         quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
 
-        bool force_debug_capture = false;
         bool deduped = false;
 
         // Deduplicate code arrays.
@@ -400,47 +399,22 @@
           offset_ += code_size;
         }
 
-        uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
-        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
-        if (cfi_info != nullptr) {
-          // Copy in the FDE, if present
-          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
-          if (fde != nullptr) {
-            // Copy the information into cfi_info and then fix the address in the new copy.
-            int cur_offset = cfi_info->size();
-            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+        if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+          // Record debug information for this function if we are doing that.
 
-            // Set the 'CIE_pointer' field to cur_offset+4.
-            uint32_t CIE_pointer = cur_offset + 4;
-            uint32_t offset_to_update = cur_offset + sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = CIE_pointer;
-            (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-            (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-            (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-
-            // Set the 'initial_location' field to address the start of the method.
-            offset_to_update = cur_offset + 2*sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = quick_code_start;
-            (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-            (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-            (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-            force_debug_capture = true;
-          }
-        }
-
-
-        if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) {
-          // Record debug information for this function if we are doing that or
-          // we have CFI and so need it.
           std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
           if (deduped) {
-            // TODO We should place the DEDUPED tag on the first instance of a
-            // deduplicated symbol so that it will show up in a debuggerd crash
-            // report.
+            // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+            // so that it will show up in a debuggerd crash report.
             name += " [ DEDUPED ]";
           }
-          writer_->method_info_.push_back(DebugInfo(name, quick_code_start,
-                                                    quick_code_start + code_size));
+
+          const uint32_t quick_code_start = quick_code_offset -
+              writer_->oat_header_->GetExecutableOffset();
+          writer_->method_info_.push_back(DebugInfo(name,
+                                                    quick_code_start,
+                                                    quick_code_start + code_size,
+                                                    compiled_method));
         }
       }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 945048e..ef5fd6b 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -30,6 +30,7 @@
 namespace art {
 
 class BitVector;
+class CompiledMethod;
 class OutputStream;
 
 // OatHeader         variable length with count of D OatDexFiles
@@ -97,22 +98,21 @@
   ~OatWriter();
 
   struct DebugInfo {
-    DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc)
-      : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc) {
+    DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc,
+              CompiledMethod* compiled_method)
+      : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc),
+        compiled_method_(compiled_method) {
     }
-    std::string method_name_;
+    std::string method_name_;  // Note: this name is a pretty-printed name.
     uint32_t    low_pc_;
     uint32_t    high_pc_;
+    CompiledMethod* compiled_method_;
   };
 
   const std::vector<DebugInfo>& GetCFIMethodInfo() const {
     return method_info_;
   }
 
-  bool DidAddSymbols() const {
-    return compiler_driver_->DidIncludeDebugSymbols();
-  }
-
  private:
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 6cf5619..9972362 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -137,7 +137,17 @@
   }
 
   // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
-  android_data = (IsHost() ? "/tmp/art-data-XXXXXX" : "/data/dalvik-cache/art-data-XXXXXX");
+  if (IsHost()) {
+    const char* tmpdir = getenv("TMPDIR");
+    if (tmpdir != nullptr && tmpdir[0] != 0) {
+      android_data = tmpdir;
+    } else {
+      android_data = "/tmp";
+    }
+  } else {
+    android_data = "/data/dalvik-cache";
+  }
+  android_data += "/art-data-XXXXXX";
   if (mkdtemp(&android_data[0]) == nullptr) {
     PLOG(FATAL) << "mkdtemp(\"" << &android_data[0] << "\") failed";
   }
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index af71c19..b874a74 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -37,6 +37,7 @@
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <const bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                               mirror::ArtMethod* method,
                                               Thread* self, bool* slow_path) {
@@ -86,6 +87,7 @@
 }
 
 // TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE
 static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
                                                                  Thread* self,
                                                                  bool* slow_path) {
@@ -116,6 +118,7 @@
 // check.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
                                                   mirror::ArtMethod* method,
                                                   Thread* self,
@@ -135,6 +138,7 @@
 // Given the context of a calling Method and a resolved class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
                                                           mirror::ArtMethod* method,
                                                           Thread* self,
@@ -157,6 +161,7 @@
 // Given the context of a calling Method and an initialized class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
                                                              mirror::ArtMethod* method,
                                                              Thread* self,
@@ -169,6 +174,7 @@
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
                                              mirror::ArtMethod* method,
                                              int32_t component_count,
@@ -205,6 +211,7 @@
 // check.
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
                                                 mirror::ArtMethod* method,
                                                 int32_t component_count,
@@ -227,6 +234,7 @@
 }
 
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
                                                         mirror::ArtMethod* method,
                                                         int32_t component_count,
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 1f2713a..7d4da18 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -25,11 +25,34 @@
 
 namespace art {
 
+static constexpr bool kUseTlabFastPath = true;
+
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
 extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx); \
+    if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
@@ -37,6 +60,26 @@
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    if (LIKELY(klass->IsInitialized())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
 } \
@@ -44,6 +87,24 @@
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    size_t byte_count = klass->GetObjectSize(); \
+    byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+    mirror::Object* obj; \
+    if (LIKELY(byte_count < self->TlabSize())) { \
+      obj = self->AllocTlab(byte_count); \
+      DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+      obj->SetClass(klass); \
+      if (kUseBakerOrBrooksReadBarrier) { \
+        if (kUseBrooksReadBarrier) { \
+          obj->SetReadBarrierPointer(obj); \
+        } \
+        obj->AssertReadBarrierPointer(); \
+      } \
+      QuasiAtomic::ThreadFenceForConstructor(); \
+      return obj; \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
 } \
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 8ffadd5..a82392a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -662,7 +662,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kGrow>
-  bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
 
   // Returns true if the address passed in is within the address range of a continuous space.
   bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 23b9aed..c4d51cb 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -53,7 +53,7 @@
   kIntrinsicRint,
   kIntrinsicRoundFloat,
   kIntrinsicRoundDouble,
-  kIntrinsicGet,
+  kIntrinsicReferenceGet,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
   kIntrinsicIsEmptyOrLength,
diff --git a/test/run-test b/test/run-test
index aef7c52..ca7e68c 100755
--- a/test/run-test
+++ b/test/run-test
@@ -33,7 +33,11 @@
 progdir=`pwd`
 prog="${progdir}"/`basename "${prog}"`
 test_dir="test-$$"
-tmp_dir="/tmp/$USER/${test_dir}"
+if [ -z "$TMPDIR" ]; then
+  tmp_dir="/tmp/$USER/${test_dir}"
+else
+  tmp_dir="${TMPDIR}/$USER/${test_dir}"
+fi
 
 export JAVA="java"
 export JAVAC="javac -g"