Merge "Add mark compact collector."
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5466abd..590c767 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -190,7 +190,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -261,7 +261,7 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
+      if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
@@ -356,13 +356,13 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+      Thread::kStackOverflowSignalReservedBytes;
+  bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -381,7 +381,7 @@
       // This is done before the callee save instructions to avoid any possibility
       // of these overflowing.  This uses r12 and that's never saved in a callee
       // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
       Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
     }
@@ -401,7 +401,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c1ce03d..3f32c51 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -219,7 +219,8 @@
   kA64First = 0,
   kA64Adc3rrr = kA64First,  // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
   kA64Add4RRdT,      // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Add4rrro,      // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Add4rrro,      // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Add4RRre,      // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Adr2xd,        // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
   kA64And3Rrl,       // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64And4rrro,      // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
@@ -328,7 +329,8 @@
   kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
   kA64Stlxr3wrX,     // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0].
   kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+  kA64Sub4RRre,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
   kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
   kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index c5bd005..2a8da24 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -115,6 +115,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
                  "add", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Add4RRre), SF_VARIANTS(0x0b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "add", "!0r, !1r, !2r!3e", kFixupNone),
     // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
     //   fixups and contains information to identify the adr label.
     ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
@@ -558,6 +562,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
                  "sub", "!0r, !1r, !2r!3o", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Sub4RRre), SF_VARIANTS(0x4b200000),
+                 kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16,
+                 kFmtExtend, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "sub", "!0r, !1r, !2r!3e", kFixupNone),
     ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
                  kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f1748ef..1df576b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -95,8 +95,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
-  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -141,7 +140,6 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
   LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
@@ -150,8 +148,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
-  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
+  OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
@@ -213,7 +210,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -261,7 +258,7 @@
     null_check_branch = nullptr;  // No null check.
   } else {
     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
@@ -337,19 +334,19 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                              (static_cast<size_t>(frame_size_) <
-                              Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
-  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
+        Thread::kStackOverflowSignalReservedBytes;
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
@@ -382,7 +379,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
@@ -412,7 +409,7 @@
         // Branch to throw target if there is not enough room.
         OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
         OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
       } else {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 0fa7f2b..f1270ec 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -241,6 +241,8 @@
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
+    LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                           A64RegExtEncodings ext, uint8_t amount);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index fba368a..06e1cda 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -1163,7 +1163,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 2254b8b..672aa88 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -99,7 +99,8 @@
 
   LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
   if (data_target == NULL) {
-    data_target = AddWordData(&literal_list_, value);
+    // Wide, as we need 8B alignment.
+    data_target = AddWideData(&literal_list_, value, 0);
   }
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
@@ -643,6 +644,44 @@
   }
 }
 
+LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpAdd:
+      opcode = kA64Add4RRre;
+      break;
+    case kOpSub:
+      opcode = kA64Sub4RRre;
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented opcode: " << op;
+      break;
+  }
+  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+
+  if (r_dest.Is64Bit()) {
+    CHECK(r_src1.Is64Bit());
+
+    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
+    // Note: this is not according to aarch64 specifications, but our encoding.
+    if (!r_src2.Is64Bit()) {
+      r_src2 = As64BitReg(r_src2);
+    }
+  } else {
+    CHECK(!r_src1.Is64Bit());
+    CHECK(!r_src2.Is64Bit());
+  }
+
+  // Sanity checks.
+  //    1) Amount is in the range 0..4
+  CHECK_LE(amount, 4);
+
+  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
+                 EncodeExtend(ext, amount));
+}
+
 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
@@ -660,6 +699,7 @@
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+  int info = 0;
 
   switch (op) {
     case kOpLsl: {
@@ -692,7 +732,8 @@
         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
         log_imm = -1;
-        alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
+        alt_opcode = (neg) ? kA64Add4RRre : kA64Sub4RRre;
+        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
       }
       break;
     // case kOpRsub:
@@ -734,8 +775,8 @@
   if (log_imm >= 0) {
     return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
-    RegStorage r_scratch = AllocTemp();
-    if (IS_WIDE(wide)) {
+    RegStorage r_scratch;
+    if (is_wide) {
       r_scratch = AllocTempWide();
       LoadConstantWide(r_scratch, value);
     } else {
@@ -743,7 +784,7 @@
       LoadConstant(r_scratch, value);
     }
     if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
     else
       res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
     FreeTemp(r_scratch);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3b99421..e36b592 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -173,7 +173,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
-  if (Runtime::Current()->ExplicitNullChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
@@ -188,7 +188,7 @@
 }
 
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -197,13 +197,13 @@
 }
 
 void Mir2Lir::MarkPossibleStackOverflowException() {
-  if (!Runtime::Current()->ExplicitStackOverflowChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
     MarkSafepointPC(last_lir_insn_);
   }
 }
 
 void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
-  if (!Runtime::Current()->ExplicitNullChecks()) {
+  if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
@@ -2171,7 +2171,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       return;
     }
@@ -2191,7 +2191,7 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
-  if (Runtime::Current()->ExplicitSuspendChecks()) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetExplicitSuspendChecks()) {
     if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
       OpUnconditionalBranch(target);
       return;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 641579f..b3fac77 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -977,7 +977,7 @@
                            type, skip_this);
 
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
@@ -1204,7 +1204,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c734202..e53105f 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -305,8 +305,7 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ca65432..f70087d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -25,6 +25,7 @@
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
+#include "instruction_set.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -206,6 +207,36 @@
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
+               kSmallFrameSize_is_not_a_small_frame_arm);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
+               kSmallFrameSize_is_not_a_small_frame_arm64);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
+               kSmallFrameSize_is_not_a_small_frame_mips);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
+               kSmallFrameSize_is_not_a_small_frame_x86);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
+               kSmallFrameSize_is_not_a_small_frame_x64_64);
+
 class Mir2Lir : public Backend {
   public:
     /*
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index dd5dab2..28195ab 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -235,8 +235,8 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+      !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 92753e4..078dd5a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -2202,7 +2202,7 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    if (Runtime::Current()->ExplicitNullChecks()) {
+    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5d1c5da..fb3341b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -53,7 +53,10 @@
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
     generate_gdb_information_(false),
     top_k_profile_threshold_(kDefaultTopKProfileThreshold),
-    include_debug_symbols_(kDefaultIncludeDebugSymbols)
+    include_debug_symbols_(kDefaultIncludeDebugSymbols),
+    explicit_null_checks_(true),
+    explicit_so_checks_(true),
+    explicit_suspend_checks_(true)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -67,7 +70,10 @@
                   size_t num_dex_methods_threshold,
                   bool generate_gdb_information,
                   double top_k_profile_threshold,
-                  bool include_debug_symbols
+                  bool include_debug_symbols,
+                  bool explicit_null_checks,
+                  bool explicit_so_checks,
+                  bool explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -80,7 +86,10 @@
     num_dex_methods_threshold_(num_dex_methods_threshold),
     generate_gdb_information_(generate_gdb_information),
     top_k_profile_threshold_(top_k_profile_threshold),
-    include_debug_symbols_(include_debug_symbols)
+    include_debug_symbols_(include_debug_symbols),
+    explicit_null_checks_(explicit_null_checks),
+    explicit_so_checks_(explicit_so_checks),
+    explicit_suspend_checks_(explicit_suspend_checks)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -147,6 +156,30 @@
     return include_debug_symbols_;
   }
 
+  bool GetExplicitNullChecks() const {
+    return explicit_null_checks_;
+  }
+
+  void SetExplicitNullChecks(bool new_val) {
+    explicit_null_checks_ = new_val;
+  }
+
+  bool GetExplicitStackOverflowChecks() const {
+    return explicit_so_checks_;
+  }
+
+  void SetExplicitStackOverflowChecks(bool new_val) {
+    explicit_so_checks_ = new_val;
+  }
+
+  bool GetExplicitSuspendChecks() const {
+    return explicit_suspend_checks_;
+  }
+
+  void SetExplicitSuspendChecks(bool new_val) {
+    explicit_suspend_checks_ = new_val;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -166,6 +199,9 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool include_debug_symbols_;
+  bool explicit_null_checks_;
+  bool explicit_so_checks_;
+  bool explicit_suspend_checks_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c3f2082..d7b34dc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -744,6 +744,19 @@
   *parsed_value = value;
 }
 
+void CheckExplicitCheckOptions(InstructionSet isa, bool* explicit_null_checks,
+                               bool* explicit_so_checks, bool* explicit_suspend_checks) {
+  switch (isa) {
+    case kArm:
+      break;  // All checks implemented, leave as is.
+
+    default:  // No checks implemented, reset all to explicit checks.
+      *explicit_null_checks = true;
+      *explicit_so_checks = true;
+      *explicit_suspend_checks = true;
+  }
+}
+
 static int dex2oat(int argc, char** argv) {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
@@ -825,6 +838,11 @@
   bool watch_dog_enabled = !kIsTargetBuild;
   bool generate_gdb_information = kIsDebugBuild;
 
+  bool explicit_null_checks = true;
+  bool explicit_so_checks = true;
+  bool explicit_suspend_checks = true;
+  bool has_explicit_checks_options = false;
+
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
     const bool log_options = false;
@@ -998,6 +1016,31 @@
     } else if (option.starts_with("--dump-cfg-passes=")) {
       std::string dump_passes = option.substr(strlen("--dump-cfg-passes=")).data();
       PassDriverMEOpts::SetDumpPassList(dump_passes);
+    } else if (option.starts_with("--implicit-checks=")) {
+      std::string checks = option.substr(strlen("--implicit-checks=")).data();
+      std::vector<std::string> checkvec;
+      Split(checks, ',', checkvec);
+      for (auto& str : checkvec) {
+        std::string val = Trim(str);
+        if (val == "none") {
+          explicit_null_checks = true;
+          explicit_so_checks = true;
+          explicit_suspend_checks = true;
+        } else if (val == "null") {
+          explicit_null_checks = false;
+        } else if (val == "suspend") {
+          explicit_suspend_checks = false;
+        } else if (val == "stack") {
+          explicit_so_checks = false;
+        } else if (val == "all") {
+          explicit_null_checks = false;
+          explicit_so_checks = false;
+          explicit_suspend_checks = false;
+        } else {
+          Usage("--implicit-checks passed non-recognized value %s", val.c_str());
+        }
+        has_explicit_checks_options = true;
+      }
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1126,6 +1169,9 @@
     Usage("Unknown --compiler-filter value %s", compiler_filter_string);
   }
 
+  CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
+                            &explicit_suspend_checks);
+
   CompilerOptions compiler_options(compiler_filter,
                                    huge_method_threshold,
                                    large_method_threshold,
@@ -1134,7 +1180,10 @@
                                    num_dex_methods_threshold,
                                    generate_gdb_information,
                                    top_k_profile_threshold,
-                                   include_debug_symbols
+                                   include_debug_symbols,
+                                   explicit_null_checks,
+                                   explicit_so_checks,
+                                   explicit_suspend_checks
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
@@ -1205,6 +1254,18 @@
     return EXIT_FAILURE;
   }
   std::unique_ptr<Dex2Oat> dex2oat(p_dex2oat);
+
+  // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in
+  // this case at all, as we'll have to throw away produced code for a mismatch.
+  if (!has_explicit_checks_options) {
+    if (instruction_set == kRuntimeISA) {
+      Runtime* runtime = Runtime::Current();
+      compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks());
+      compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks());
+      compiler_options.SetExplicitSuspendChecks(runtime->ExplicitSuspendChecks());
+    }
+  }
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now so that we don't starve GC.
   Thread* self = Thread::Current();
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index f81e2f9..2a82129 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -22,6 +22,7 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "instruction_set.h"
 #include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "thread.h"
@@ -59,7 +60,7 @@
   // get the method from the top of the stack.  However it's in r0.
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes);
+      reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
   if (overflow_addr == fault_addr) {
     *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   } else {
@@ -190,7 +191,7 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
-  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+  uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
 
   Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
   CHECK_EQ(self, Thread::Current());
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 4ede453..2e60b93 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1615,14 +1615,14 @@
     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
     // easier to have an extra small stack area.
 
-    str x19, [sp, #-16]!      // Save integer result.
+    str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
     str d0,  [sp, #8]         // Save floating-point result.
 
-    mov   x0, xSELF           // Pass Thread.
     add   x1, sp, #16         // Pass SP.
     mov   x2, x0              // Pass integer result.
     fmov  x3, d0              // Pass floating-point result.
+    mov   x0, xSELF           // Pass Thread.
     bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
 
     mov   x9, x0              // Return address from instrumentation call.
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 67e7100..96eeb8d 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
+#include "globals.h"       // For KB.
 
 namespace art {
 
@@ -36,6 +37,20 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
 // Architecture-specific pointer sizes
 static constexpr size_t kArmPointerSize = 4;
 static constexpr size_t kArm64PointerSize = 8;
@@ -153,19 +168,33 @@
   }
 }
 
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: shrink reserved space, in particular for 64bit.
+
+// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
+// But this one works rather well.
+static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
+// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+// test-art-host-run-test-interpreter-018-stack-overflow
+// test-art-host-run-test-interpreter-107-int-math2
+static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
+
+static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
+           isa == kArm64 ? kArm64StackOverflowReservedBytes :
+           isa == kMips ? kMipsStackOverflowReservedBytes :
+           isa == kX86 ? kX86StackOverflowReservedBytes :
+           isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
+           isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
+           (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+static constexpr size_t kRuntimeStackOverflowReservedBytes =
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 enum InstructionFeatures {
   kHwDiv  = 0x1,              // Supports hardware divide.
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1242324..7cdd8f5 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -565,6 +565,10 @@
       if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_change_threshold_)) {
         return false;
       }
+    } else if (option == "-Xprofile-type:method") {
+      profiler_options_.profile_type_ = kProfilerMethod;
+    } else if (option == "-Xprofile-type:dexpc") {
+      profiler_options_.profile_type_ = kProfilerMethodAndDexPC;
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -808,6 +812,7 @@
   UsageMessage(stream, "  -Xprofile-start-immediately\n");
   UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
   UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
+  UsageMessage(stream, "  -Xprofile-type:{method,dexpc}\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 00bb501..2cd876a 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -63,7 +63,8 @@
 static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   BackgroundMethodSamplingProfiler* profiler =
       reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
+  uint32_t dex_pc;
+  mirror::ArtMethod* method = thread->GetCurrentMethod(&dex_pc);
   if (false && method == nullptr) {
     LOG(INFO) << "No current method available";
     std::ostringstream os;
@@ -71,7 +72,7 @@
     std::string data(os.str());
     LOG(INFO) << data;
   }
-  profiler->RecordMethod(method);
+  profiler->RecordMethod(method, dex_pc);
 }
 
 // A closure that is called by the thread checkpoint code.
@@ -244,7 +245,7 @@
   }
 
   // Read the previous profile.
-  profile_table_.ReadPrevious(fd);
+  profile_table_.ReadPrevious(fd, options_.GetProfileType());
 
   // Move back to the start of the file.
   lseek(fd, 0, SEEK_SET);
@@ -360,7 +361,7 @@
 
 // A method has been hit, record its invocation in the method map.
 // The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method) {
+void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method, uint32_t dex_pc) {
   if (method == nullptr) {
     profile_table_.NullMethod();
     // Don't record a nullptr method.
@@ -393,7 +394,11 @@
 
   // Add to the profile table unless it is filtered out.
   if (!is_filtered) {
-    profile_table_.Put(method);
+    if (options_.GetProfileType() == kProfilerMethod) {
+      profile_table_.Put(method);
+    } else if (options_.GetProfileType() == kProfilerMethodAndDexPC) {
+      profile_table_.PutDexPC(method, dex_pc);
+    }
   }
 }
 
@@ -403,7 +408,7 @@
 }
 
 uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os);
+  return profile_table_.Write(os, options_.GetProfileType());
 }
 
 // Profile Table.
@@ -414,19 +419,18 @@
     num_boot_methods_(0) {
   for (int i = 0; i < kHashSize; i++) {
     table[i] = nullptr;
+    dex_table[i] = nullptr;
   }
 }
 
 ProfileSampleResults::~ProfileSampleResults() {
-  for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-  }
+  Clear();
 }
 
 // Add a method to the profile table.  If it's the first time the method
 // has been seen, add it with count=1, otherwise increment the count.
 void ProfileSampleResults::Put(mirror::ArtMethod* method) {
-  lock_.Lock(Thread::Current());
+  MutexLock mu(Thread::Current(), lock_);
   uint32_t index = Hash(method);
   if (table[index] == nullptr) {
     table[index] = new Map();
@@ -438,11 +442,34 @@
     i->second++;
   }
   num_samples_++;
-  lock_.Unlock(Thread::Current());
+}
+
+// Add a method with dex pc to the profile table
+void ProfileSampleResults::PutDexPC(mirror::ArtMethod* method, uint32_t dex_pc) {
+  MutexLock mu(Thread::Current(), lock_);
+  uint32_t index = Hash(method);
+  if (dex_table[index] == nullptr) {
+    dex_table[index] = new MethodDexPCMap();
+  }
+  MethodDexPCMap::iterator i = dex_table[index]->find(method);
+  if (i == dex_table[index]->end()) {
+    DexPCCountMap* dex_pc_map = new DexPCCountMap();
+    (*dex_pc_map)[dex_pc] = 1;
+    (*dex_table[index])[method] = dex_pc_map;
+  } else {
+    DexPCCountMap* dex_pc_count = i->second;
+    DexPCCountMap::iterator dex_pc_i = dex_pc_count->find(dex_pc);
+    if (dex_pc_i == dex_pc_count->end()) {
+      (*dex_pc_count)[dex_pc] = 1;
+    } else {
+      dex_pc_i->second++;
+    }
+  }
+  num_samples_++;
 }
 
 // Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream &os) {
+uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
   ScopedObjectAccess soa(Thread::Current());
   num_samples_ += previous_num_samples_;
   num_null_methods_ += previous_num_null_methods_;
@@ -452,36 +479,101 @@
                  << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
-  for (int i = 0 ; i < kHashSize; i++) {
-    Map *map = table[i];
-    if (map != nullptr) {
-      for (const auto &meth_iter : *map) {
-        mirror::ArtMethod *method = meth_iter.first;
-        std::string method_name = PrettyMethod(method);
+  if (type == kProfilerMethod) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      Map *map = table[i];
+      if (map != nullptr) {
+        for (const auto &meth_iter : *map) {
+          mirror::ArtMethod *method = meth_iter.first;
+          std::string method_name = PrettyMethod(method);
 
-        const DexFile::CodeItem* codeitem = method->GetCodeItem();
-        uint32_t method_size = 0;
-        if (codeitem != nullptr) {
-          method_size = codeitem->insns_size_in_code_units_;
-        }
-        uint32_t count = meth_iter.second;
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          uint32_t count = meth_iter.second;
 
-        // Merge this profile entry with one from a previous run (if present).  Also
-        // remove the previous entry.
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          count += pi->second.count_;
-          previous_.erase(pi);
+          // Merge this profile entry with one from a previous run (if present).  Also
+          // remove the previous entry.
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            count += pi->second.count_;
+            previous_.erase(pi);
+          }
+          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
+          ++num_methods;
         }
-        os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-        ++num_methods;
+      }
+    }
+  } else if (type == kProfilerMethodAndDexPC) {
+    for (int i = 0 ; i < kHashSize; i++) {
+      MethodDexPCMap *dex_map = dex_table[i];
+      if (dex_map != nullptr) {
+        for (const auto &dex_pc_iter : *dex_map) {
+          mirror::ArtMethod *method = dex_pc_iter.first;
+          std::string method_name = PrettyMethod(method);
+
+          const DexFile::CodeItem* codeitem = method->GetCodeItem();
+          uint32_t method_size = 0;
+          if (codeitem != nullptr) {
+            method_size = codeitem->insns_size_in_code_units_;
+          }
+          DexPCCountMap* dex_pc_map = dex_pc_iter.second;
+          uint32_t total_count = 0;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            total_count += dex_pc_i.second;
+          }
+
+          PreviousProfile::iterator pi = previous_.find(method_name);
+          if (pi != previous_.end()) {
+            total_count += pi->second.count_;
+            DexPCCountMap* previous_dex_pc_map = pi->second.dex_pc_map_;
+            if (previous_dex_pc_map != nullptr) {
+              for (const auto &dex_pc_i : *previous_dex_pc_map) {
+                uint32_t dex_pc = dex_pc_i.first;
+                uint32_t count = dex_pc_i.second;
+                DexPCCountMap::iterator di = dex_pc_map->find(dex_pc);
+                if (di == dex_pc_map->end()) {
+                  (*dex_pc_map)[dex_pc] = count;
+                } else {
+                  di->second += count;
+                }
+              }
+            }
+            delete previous_dex_pc_map;
+            previous_.erase(pi);
+          }
+          std::vector<std::string> dex_pc_count_vector;
+          for (const auto &dex_pc_i : *dex_pc_map) {
+            dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+          }
+          // We write out profile data with dex pc information in the following format:
+          // "method/total_count/size/[pc_1:count_1,pc_2:count_2,...]".
+          os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
+              method_size, Join(dex_pc_count_vector, ',').c_str());
+          ++num_methods;
+        }
       }
     }
   }
 
   // Now we write out the remaining previous methods.
-  for (PreviousProfile::iterator pi = previous_.begin(); pi != previous_.end(); ++pi) {
-    os << StringPrintf("%s/%u/%u\n",  pi->first.c_str(), pi->second.count_, pi->second.method_size_);
+  for (const auto &pi : previous_) {
+    if (type == kProfilerMethod) {
+      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+    } else if (type == kProfilerMethodAndDexPC) {
+      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
+      DexPCCountMap* previous_dex_pc_map = pi.second.dex_pc_map_;
+      if (previous_dex_pc_map != nullptr) {
+        std::vector<std::string> dex_pc_count_vector;
+        for (const auto &dex_pc_i : *previous_dex_pc_map) {
+          dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+        }
+        os << Join(dex_pc_count_vector, ',');
+      }
+      os << "]\n";
+    }
     ++num_methods;
   }
   return num_methods;
@@ -492,8 +584,20 @@
   num_null_methods_ = 0;
   num_boot_methods_ = 0;
   for (int i = 0; i < kHashSize; i++) {
-     delete table[i];
-     table[i] = nullptr;
+    delete table[i];
+    table[i] = nullptr;
+    if (dex_table[i] != nullptr) {
+      for (auto &di : *dex_table[i]) {
+        delete di.second;
+        di.second = nullptr;
+      }
+    }
+    delete dex_table[i];
+    dex_table[i] = nullptr;
+  }
+  for (auto &pi : previous_) {
+    delete pi.second.dex_pc_map_;
+    pi.second.dex_pc_map_ = nullptr;
   }
   previous_.clear();
 }
@@ -520,7 +624,7 @@
   return true;
 }
 
-void ProfileSampleResults::ReadPrevious(int fd) {
+void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
   // Reset counters.
   previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
 
@@ -540,21 +644,35 @@
   previous_num_null_methods_ = atoi(summary_info[1].c_str());
   previous_num_boot_methods_ = atoi(summary_info[2].c_str());
 
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
   while (true) {
     if (!ReadProfileLine(fd, line)) {
       break;
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       break;
     }
     std::string methodname = info[0];
-    uint32_t count = atoi(info[1].c_str());
+    uint32_t total_count = atoi(info[1].c_str());
     uint32_t size = atoi(info[2].c_str());
-    previous_[methodname] = PreviousValue(count, size);
+    DexPCCountMap* dex_pc_map = nullptr;
+    if (type == kProfilerMethodAndDexPC && info.size() == 4) {
+      dex_pc_map = new DexPCCountMap();
+      std::string dex_pc_counts_str = info[3].substr(1, info[3].size() - 2);
+      std::vector<std::string> dex_pc_count_pairs;
+      Split(dex_pc_counts_str, ',', dex_pc_count_pairs);
+      for (uint32_t i = 0; i < dex_pc_count_pairs.size(); ++i) {
+        std::vector<std::string> dex_pc_count;
+        Split(dex_pc_count_pairs[i], ':', dex_pc_count);
+        uint32_t dex_pc = atoi(dex_pc_count[0].c_str());
+        uint32_t count = atoi(dex_pc_count[1].c_str());
+        (*dex_pc_map)[dex_pc] = count;
+      }
+    }
+    previous_[methodname] = PreviousValue(total_count, size, dex_pc_map);
   }
 }
 
@@ -604,7 +722,7 @@
     }
     std::vector<std::string> info;
     Split(line, '/', info);
-    if (info.size() != 3) {
+    if (info.size() != 3 && info.size() != 4) {
       // Malformed.
       return false;
     }
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 0b18dbb..396dd23 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -53,8 +53,9 @@
   ~ProfileSampleResults();
 
   void Put(mirror::ArtMethod* method);
-  uint32_t Write(std::ostream &os);
-  void ReadPrevious(int fd);
+  void PutDexPC(mirror::ArtMethod* method, uint32_t pc);
+  uint32_t Write(std::ostream &os, ProfileDataType type);
+  void ReadPrevious(int fd, ProfileDataType type);
   void Clear();
   uint32_t GetNumSamples() { return num_samples_; }
   void NullMethod() { ++num_null_methods_; }
@@ -68,15 +69,21 @@
   uint32_t num_null_methods_;    // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;    // Number of samples in the boot path.
 
-  typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
+  typedef std::map<mirror::ArtMethod*, uint32_t> Map;  // Map of method vs its count.
   Map *table[kHashSize];
 
+  typedef std::map<uint32_t, uint32_t> DexPCCountMap;  // Map of dex pc vs its count
+  // Map of method vs dex pc counts in the method.
+  typedef std::map<mirror::ArtMethod*, DexPCCountMap*> MethodDexPCMap;
+  MethodDexPCMap *dex_table[kHashSize];
+
   struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0) {}
-    PreviousValue(uint32_t count, uint32_t method_size)
-      : count_(count), method_size_(method_size) {}
+    PreviousValue() : count_(0), method_size_(0), dex_pc_map_(nullptr) {}
+    PreviousValue(uint32_t count, uint32_t method_size, DexPCCountMap* dex_pc_map)
+      : count_(count), method_size_(method_size), dex_pc_map_(dex_pc_map) {}
     uint32_t count_;
     uint32_t method_size_;
+    DexPCCountMap* dex_pc_map_;
   };
 
   typedef std::map<std::string, PreviousValue> PreviousProfile;
@@ -114,7 +121,7 @@
   static void Stop() LOCKS_EXCLUDED(Locks::profiler_lock_, wait_lock_);
   static void Shutdown() LOCKS_EXCLUDED(Locks::profiler_lock_);
 
-  void RecordMethod(mirror::ArtMethod *method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RecordMethod(mirror::ArtMethod *method, uint32_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Barrier& GetBarrier() {
     return *profiler_barrier_;
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
index 08e32cc..0b63003 100644
--- a/runtime/profiler_options.h
+++ b/runtime/profiler_options.h
@@ -22,6 +22,11 @@
 
 namespace art {
 
+enum ProfileDataType {
+  kProfilerMethod,          // Method only
+  kProfilerMethodAndDexPC,  // Method with Dex PC
+};
+
 class ProfilerOptions {
  public:
   static constexpr bool kDefaultEnabled = false;
@@ -32,6 +37,7 @@
   static constexpr bool kDefaultStartImmediately = false;
   static constexpr double kDefaultTopKThreshold = 90.0;
   static constexpr double kDefaultChangeInTopKThreshold = 10.0;
+  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
 
   ProfilerOptions() :
     enabled_(kDefaultEnabled),
@@ -41,7 +47,8 @@
     backoff_coefficient_(kDefaultBackoffCoefficient),
     start_immediately_(kDefaultStartImmediately),
     top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold) {}
+    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
+    profile_type_(kDefaultProfileData) {}
 
   ProfilerOptions(bool enabled,
                  uint32_t period_s,
@@ -50,7 +57,8 @@
                  double backoff_coefficient,
                  bool start_immediately,
                  double top_k_threshold,
-                 double top_k_change_threshold):
+                 double top_k_change_threshold,
+                 ProfileDataType profile_type):
     enabled_(enabled),
     period_s_(period_s),
     duration_s_(duration_s),
@@ -58,7 +66,8 @@
     backoff_coefficient_(backoff_coefficient),
     start_immediately_(start_immediately),
     top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold) {}
+    top_k_change_threshold_(top_k_change_threshold),
+    profile_type_(profile_type) {}
 
   bool IsEnabled() const {
     return enabled_;
@@ -92,6 +101,10 @@
     return top_k_change_threshold_;
   }
 
+  ProfileDataType GetProfileType() const {
+    return profile_type_;
+  }
+
  private:
   friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
     os << "enabled=" << po.enabled_
@@ -101,7 +114,8 @@
        << ", backoff_coefficient=" << po.backoff_coefficient_
        << ", start_immediately=" << po.start_immediately_
        << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_;
+       << ", top_k_change_threshold=" << po.top_k_change_threshold_
+       << ", profile_type=" << po.profile_type_;
     return os;
   }
 
@@ -123,6 +137,8 @@
   double top_k_threshold_;
   // How much the top K% samples needs to change in order for the app to be recompiled.
   double top_k_change_threshold_;
+  // The type of profile data dumped to the disk.
+  ProfileDataType profile_type_;
 };
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 717381c..8aa7ea1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -421,6 +421,9 @@
     int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
     if (fd >= 0) {
       close(fd);
+    } else if (errno != EEXIST) {
+      LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
+      return true;
     }
     StartProfiler(profile_output_filename_.c_str());
   }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6980530..3f8f4a3 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -220,7 +220,7 @@
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += Thread::kStackOverflowReservedBytes;
+    stack_size += kRuntimeStackOverflowReservedBytes;
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -489,7 +489,7 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kStackOverflowReservedBytes) {
+  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -2200,7 +2200,7 @@
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kStackOverflowReservedBytes << ")?";
+               << kRuntimeStackOverflowReservedBytes << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index bff9b52..7cd86de 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,6 +33,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "handle_scope.h"
+#include "instruction_set.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -94,28 +95,8 @@
 
 class Thread {
  public:
-  // Space to throw a StackOverflowError in.
-  // TODO: shrink reserved space, in particular for 64bit.
-#if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__aarch64__)
-  // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
-  // But this one works rather well.
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__i386__)
-  // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
-  // test-art-host-run-test-interpreter-018-stack-overflow
-  // test-art-host-run-test-interpreter-107-int-math2
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
-#else
-  static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
-#endif
   // How much of the reserved bytes is reserved for incoming signals.
   static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
-  // How much of the reserved bytes we may temporarily use during stack overflow checks as an
-  // optimization.
-  static constexpr size_t kStackOverflowReservedUsableBytes =
-      kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
@@ -123,7 +104,7 @@
   // throwing the StackOverflow exception.
   static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
   static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-    kStackOverflowReservedBytes;
+      kRuntimeStackOverflowReservedBytes;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -585,7 +566,7 @@
       // overflow region.
       tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
     } else {
-      tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes;
+      tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
     }
   }