Add implicit null and stack checks for x86

This adds compiler and runtime changes for x86
implicit checks.  32 bit only.

Both host and target are supported.
By default, on the host, the implicit checks are null pointer and
stack overflow.  Suspend is implemented but not switched on.

Change-Id: I88a609e98d6bf32f283eaa4e6ec8bbf8dc1df78a
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 10cd1cc..a82d1f5 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -235,7 +235,7 @@
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_HOST_DEPS) $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX)) $$(gtest_deps)
-	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && $$(call ART_TEST_PASSED,$$@)) \
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) $$< && $$(call ART_TEST_PASSED,$$@)) \
 	  || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(gtest_rule)
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index dd87f4a..d1caf93 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -50,7 +50,7 @@
 
 IMPLICIT_CHECKS_arm := null,stack
 IMPLICIT_CHECKS_arm64 := none
-IMPLICIT_CHECKS_x86 := none
+IMPLICIT_CHECKS_x86 := null,stack
 IMPLICIT_CHECKS_x86_64 := none
 IMPLICIT_CHECKS_mips := none
 define create-core-oat-target-rules
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 7d75da9..01d6354 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -96,7 +96,7 @@
                               RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
     LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                           int offset, int check_value, LIR* target) OVERRIDE;
+                           int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e8f5cb9..6be66a2 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -163,7 +163,8 @@
 
 LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
                                      RegStorage base_reg, int offset, int check_value,
-                                     LIR* target) {
+                                     LIR* target, LIR** compare) {
+  DCHECK(compare == nullptr);
   // It is possible that temp register is 64-bit. (ArgReg or RefReg)
   // Always compare 32-bit value no matter what temp_reg is.
   if (temp_reg.Is64Bit()) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5870d22..1ac4707 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1162,9 +1162,12 @@
 }
 
 LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                int offset, int check_value, LIR* target) {
+                                int offset, int check_value, LIR* target, LIR** compare) {
   // Handle this for architectures that can't compare to memory.
-  Load32Disp(base_reg, offset, temp_reg);
+  LIR* inst = Load32Disp(base_reg, offset, temp_reg);
+  if (compare != nullptr) {
+    *compare = inst;
+  }
   LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
   return branch;
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index b31e9a2..0bb253c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -195,6 +195,7 @@
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
+    // Insert after last instruction.
     MarkSafepointPC(last_lir_insn_);
   }
 }
@@ -622,7 +623,7 @@
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
@@ -715,7 +716,7 @@
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6c0dfe8..55b68e6 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -985,17 +985,31 @@
       *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, false), info->opt_flags);
     }
   }
   return call_state;
 }
 
+// Default implementation of implicit null pointer check.
+// Overridden by arch specific as necessary.
+void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  RegStorage tmp = AllocTemp();
+  Load32Disp(reg, 0, tmp);
+  MarkPossibleNullPointerException(opt_flags);
+  FreeTemp(tmp);
+}
+
+
 /*
  * May have 0+ arguments (also used for jumbo).  Note that
  * source virtual registers may be in physical registers, so may
@@ -1212,12 +1226,13 @@
       *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, false), info->opt_flags);
     }
   }
   return call_state;
@@ -1293,11 +1308,14 @@
       // On x86, we can compare to memory directly
       // Set up a launch pad to allow retry in case of bounds violation */
       if (rl_idx.is_const) {
+        LIR* comparison;
         range_check_branch = OpCmpMemImmBranch(
             kCondUlt, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
-            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr);
-      } else {
+            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr, &comparison);
+        MarkPossibleNullPointerExceptionAfter(0, comparison);
+     } else {
         OpRegMem(kOpCmp, rl_idx.reg, rl_obj.reg, count_offset);
+        MarkPossibleNullPointerException(0);
         range_check_branch = OpCondBranch(kCondUge, nullptr);
       }
     }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4885501..e93c6e3 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -838,6 +838,7 @@
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
+    virtual void GenImplicitNullCheck(RegStorage reg, int opt_flags);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src,
@@ -1147,10 +1148,12 @@
      * @param base_reg The register holding the base address.
      * @param offset The offset from the base.
      * @param check_value The immediate to compare to.
+     * @param target branch target (or nullptr)
+     * @param compare output for getting LIR for comparison (or nullptr)
      * @returns The branch instruction that was generated.
      */
     virtual LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target);
+                                   int offset, int check_value, LIR* target, LIR** compare);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 8df5b6d..ebe3f0a 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -271,21 +271,22 @@
   { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
   { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
   { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
   { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
   { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
+  { kX86Test32RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!1d]" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..916198d 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -222,15 +222,27 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
 
-  /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
-
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-      !IsLargeFrame(frame_size_, cu_->target64 ? kX86_64 : kX86);
+  InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
+
+  // If we doing an implicit stack overflow check, perform the load immediately
+  // before the stack pointer is decremented and anything is saved.
+  if (!skip_overflow_check && !Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // Implicit stack overflow check.
+    // test eax,[esp + -overflow]
+    int overflow = GetStackOverflowReservedBytes(isa);
+    NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rX86_SP.GetReg(), -overflow);
+    MarkPossibleStackOverflowException();
+  }
+
+  /* Build frame, return address already on stack */
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ -
+                              GetInstructionSetPointerSize(cu_->instruction_set));
+
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -261,25 +273,27 @@
      private:
       const size_t sp_displace_;
     };
-    // TODO: for large frames we should do something like:
-    // spill ebp
-    // lea ebp, [esp + frame_size]
-    // cmp ebp, fs:[stack_end_]
-    // jcc stack_overflow_exception
-    // mov esp, ebp
-    // in case a signal comes in that's not using an alternate signal stack and the large frame may
-    // have moved us outside of the reserved area at the end of the stack.
-    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (cu_->target64) {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
-    } else {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
-    }
-    LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      // TODO: for large frames we should do something like:
+      // spill ebp
+      // lea ebp, [esp + frame_size]
+      // cmp ebp, fs:[stack_end_]
+      // jcc stack_overflow_exception
+      // mov esp, ebp
+      // in case a signal comes in that's not using an alternate signal stack and the large frame
+      // may have moved us outside of the reserved area at the end of the stack.
+      // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+      if (cu_->target64) {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
+      } else {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
+      }
+      LIR* branch = OpCondBranch(kCondUlt, nullptr);
+      AddSlowPath(
         new(arena_)StackOverflowSlowPath(this, branch,
                                          frame_size_ -
                                          GetInstructionSetPointerSize(cu_->instruction_set)));
+    }
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -318,4 +332,14 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  // Implicit null pointer check.
+  // test eax,[arg1+0]
+  NewLIR3(kX86Test32RM, rs_rAX.GetReg(), reg.GetReg(), 0);
+  MarkPossibleNullPointerException(opt_flags);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..7e77364 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -85,6 +85,7 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size) OVERRIDE;
   void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  void GenImplicitNullCheck(RegStorage reg, int opt_flags);
 
   // Required for target - register utilities.
   RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
@@ -796,9 +797,11 @@
    * @param base_reg The register holding the base address.
    * @param offset The offset from the base.
    * @param check_value The immediate to compare to.
+   * @param target branch target (or nullptr)
+   * @param compare output for getting LIR for comparison (or nullptr)
    */
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target);
+                         int offset, int check_value, LIR* target, LIR** compare);
 
   /*
    * Can this operation be using core registers without temporaries?
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f1166f6..e8118dc 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1092,6 +1092,7 @@
   };
 
   OpRegMem(kOpCmp, index, array_base, len_offset);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondUge, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
@@ -1132,6 +1133,7 @@
   };
 
   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondLs, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..6731b38 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -825,8 +825,10 @@
 }
 
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
-  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
-  return nullptr;
+  // First load the pointer in fs:[suspend-trigger] into eax
+  // Then use a test instruction to indirect via that address.
+  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),  Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
 }
 
 uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
@@ -1189,6 +1191,7 @@
   // Is the string non-NULL?
   LoadValueDirectFixed(rl_obj, rs_rDX);
   GenNullCheck(rs_rDX, info->opt_flags);
+  // uint32_t opt_flags = info->opt_flags;
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
 
   // Does the character fit in 16 bits?
@@ -1215,12 +1218,20 @@
   // Character is in EAX.
   // Object pointer is in EDX.
 
+  // Compute the number of words to search in to rCX.
+  Load32Disp(rs_rDX, count_offset, rs_rCX);
+
+  // Possible signal here due to null pointer dereference.
+  // Note that the signal handler will expect the top word of
+  // the stack to be the ArtMethod*.  If the PUSH edi instruction
+  // below is ahead of the load above then this will not be true
+  // and the signal handler will not work.
+  MarkPossibleNullPointerException(0);
+
   // We need to preserve EDI, but have no spare registers, so push it on the stack.
   // We have to remember that all stack addresses after this are offset by sizeof(EDI).
   NewLIR1(kX86Push32R, rs_rDI.GetReg());
 
-  // Compute the number of words to search in to rCX.
-  Load32Disp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
   bool is_index_on_stack = false;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 657160f..09fce91 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -684,9 +684,9 @@
     } else {
       DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
       if (r_base == r_dest.GetLow()) {
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
+        load = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
-        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load2 = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
       } else {
         load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
         load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
@@ -712,16 +712,16 @@
         if (r_dest.GetHigh() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
-          load2 = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
           OpRegCopy(r_dest.GetHigh(), temp);
           FreeTemp(temp);
         } else {
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
         }
       } else {
@@ -744,6 +744,7 @@
     }
   }
 
+  // Always return first load generated as this might cause a fault if base is nullptr.
   return load;
 }
 
@@ -881,9 +882,12 @@
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target) {
-    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(), offset,
-            check_value);
+                                   int offset, int check_value, LIR* target, LIR** compare) {
+    LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
+            offset, check_value);
+    if (compare != nullptr) {
+        *compare = inst;
+    }
     LIR* branch = OpCondBranch(cond, target);
     return branch;
 }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..1ec3d41 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -497,6 +497,7 @@
   UnaryOpcode(kX86Test, RI, MI, AI),
   kX86Test32RR,
   kX86Test64RR,
+  kX86Test32RM,
   UnaryOpcode(kX86Not, R, M, A),
   UnaryOpcode(kX86Neg, R, M, A),
   UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 80e7724..8800d18 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -738,6 +738,7 @@
   switch (isa) {
     case kArm:
     case kThumb2:
+    case kX86:
       break;  // All checks implemented, leave as is.
 
     default:  // No checks implemented, reset all to explicit checks.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7f5cf0c..3774b32 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -412,6 +412,7 @@
     LOCAL_STATIC_LIBRARIES := libziparchive libz
   else # host
     LOCAL_STATIC_LIBRARIES += libcutils libziparchive-host libz libutils
+    LOCAL_SHARED_LIBRARIES += libsigchain
     LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($$(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 2a82129..e22c56e 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -46,9 +46,10 @@
   return instr_size;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
   VLOG(signals) << "sp: " << *out_sp;
@@ -114,7 +115,7 @@
   uint32_t checkinst1 = 0xf8d90000 + Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
   uint16_t checkinst2 = 0x6800;
 
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
@@ -178,7 +179,7 @@
 // to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
@@ -205,7 +206,7 @@
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  Determine the address of the next
+  // that exists below the protected region.  Determine the address of the next
   // available valid address below the protected region.
   uintptr_t prevsp = sp;
   sp = pregion;
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 74c3023..34eede6 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 1ecd7d9..5a64a69 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 7c1980e..f62200a 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -21,6 +21,10 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
 
 
 //
@@ -29,19 +33,294 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_test_suspend();
+
+// From the x86 disassembler...
+enum SegmentPrefix {
+  kCs = 0x2e,
+  kSs = 0x36,
+  kDs = 0x3e,
+  kEs = 0x26,
+  kFs = 0x64,
+  kGs = 0x65,
+};
+
+// Get the size of an instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint8_t* instruction_start = pc;
+  bool have_prefixes = true;
+  bool two_byte = false;
+
+  // Skip all the prefixes.
+  do {
+    switch (*pc) {
+        // Group 1 - lock and repeat prefixes:
+      case 0xF0:
+      case 0xF2:
+      case 0xF3:
+        // Group 2 - segment override prefixes:
+      case kCs:
+      case kSs:
+      case kDs:
+      case kEs:
+      case kFs:
+      case kGs:
+        // Group 3 - operand size override:
+      case 0x66:
+        // Group 4 - address size override:
+      case 0x67:
+        break;
+      default:
+        have_prefixes = false;
+        break;
+    }
+    if (have_prefixes) {
+      pc++;
+    }
+  } while (have_prefixes);
+
+#if defined(__x86_64__)
+  // Skip REX is present.
+  if (*pc >= 0x40 && *pc <= 0x4F) {
+    ++pc;
+  }
+#endif
+
+  // Check for known instructions.
+  uint32_t known_length = 0;
+  switch (*pc) {
+  case 0x83:                // cmp [r + v], b: 4 byte instruction
+    known_length = 4;
+    break;
+  }
+
+  if (known_length > 0) {
+    VLOG(signals) << "known instruction with length " << known_length;
+    return known_length;
+  }
+
+  // Unknown instruction, work out length.
+
+  // Work out if we have a ModR/M byte.
+  uint8_t opcode = *pc++;
+  if (opcode == 0xf) {
+    two_byte = true;
+    opcode = *pc++;
+  }
+
+  bool has_modrm = false;         // Is ModR/M byte present?
+  uint8_t hi = opcode >> 4;       // Opcode high nybble.
+  uint8_t lo = opcode & 0b1111;   // Opcode low nybble.
+
+  // From the Intel opcode tables.
+  if (two_byte) {
+    has_modrm = true;   // TODO: all of these?
+  } else if (hi < 4) {
+    has_modrm = lo < 4 || (lo >= 8 && lo <= 0xb);
+  } else if (hi == 6) {
+    has_modrm = lo == 3 || lo == 9 || lo == 0xb;
+  } else if (hi == 8) {
+    has_modrm = lo != 0xd;
+  } else if (hi == 0xc) {
+    has_modrm = lo == 1 || lo == 2 || lo == 6 || lo == 7;
+  } else if (hi == 0xd) {
+    has_modrm = lo < 4;
+  } else if (hi == 0xf) {
+    has_modrm = lo == 6 || lo == 7;
+  }
+
+  if (has_modrm) {
+    uint8_t modrm = *pc++;
+    uint8_t mod = (modrm >> 6) & 0b11;
+    uint8_t reg = (modrm >> 3) & 0b111;
+    switch (mod) {
+      case 0:
+        break;
+      case 1:
+        if (reg == 4) {
+          // SIB + 1 byte displacement.
+          pc += 2;
+        } else {
+          pc += 1;
+        }
+        break;
+      case 2:
+        // SIB + 4 byte displacement.
+        pc += 5;
+        break;
+      case 3:
+        break;
+    }
+  }
+
+  VLOG(signals) << "calculated X86 instruction size is " << (pc - instruction_start);
+  return pc - instruction_start;
+}
+
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  *out_sp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_ESP]);
+  VLOG(signals) << "sp: " << std::hex << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in EAX.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->uc_mcontext.gregs[REG_EAX]);
+  } else {
+    // The method is at the top of the stack.
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
+  }
+
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  VLOG(signals) << HexDump(pc, 32, true, "PC ");
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
 bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  // We need to arrange for the signal handler to return to the null pointer
+  // exception generator.  The return address must be the address of the
+  // next instruction (this instruction + instruction size).  The return address
+  // is on the stack at the top address of the current frame.
+
+  // Push the return address onto the stack.
+  uint32_t retaddr = reinterpret_cast<uint32_t>(pc + instr_size);
+  uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+  *next_sp = retaddr;
+  uc->uc_mcontext.gregs[REG_ESP] = reinterpret_cast<uint32_t>(next_sp);
+
+  uc->uc_mcontext.gregs[REG_EIP] =
+        reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
+}
+
+// A suspend check is done using the following instruction sequence:
+// 0xf720f1df:         648B058C000000      mov     eax, fs:[0x8c]  ; suspend_trigger
+// .. some intervening instructions.
+// 0xf720f1e6:                   8500      test    eax, [eax]
+
+// The offset from fs is Thread::ThreadSuspendTriggerOffset().
+// To check for a suspend check, we examine the instructions that caused
+// the fault.
+bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
+  // where xxx is the offset of the suspend trigger.
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+
+  VLOG(signals) << "Checking for suspension point";
+  uint8_t checkinst1[] = {0x64, 0x8b, 0x05, static_cast<uint8_t>(trigger & 0xff),
+      static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+  uint8_t checkinst2[] = {0x85, 0x00};
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  if (pc[0] != checkinst2[0] || pc[1] != checkinst2[1]) {
+    // Second instruction is not correct (test eax,[eax]).
+    VLOG(signals) << "Not a suspension point";
+    return false;
+  }
+
+  // The first instruction can a little bit up the stream due to load hoisting
+  // in the compiler.
+  uint8_t* limit = pc - 100;   // Compiler will hoist to a max of 20 instructions.
+  uint8_t* ptr = pc - sizeof(checkinst1);
+  bool found = false;
+  while (ptr > limit) {
+    if (memcmp(ptr, checkinst1, sizeof(checkinst1)) == 0) {
+      found = true;
+      break;
+    }
+    ptr -= 1;
+  }
+
+  if (found) {
+    VLOG(signals) << "suspend check match";
+
+    // We need to arrange for the signal handler to return to the null pointer
+    // exception generator.  The return address must be the address of the
+    // next instruction (this instruction + 2).  The return address
+    // is on the stack at the top address of the current frame.
+
+    // Push the return address onto the stack.
+    uint32_t retaddr = reinterpret_cast<uint32_t>(pc + 2);
+    uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+    *next_sp = retaddr;
+    uc->uc_mcontext.gregs[REG_ESP] = reinterpret_cast<uint32_t>(next_sp);
+
+    uc->uc_mcontext.gregs[REG_EIP] = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
+
+    // Now remove the suspend trigger that caused this fault.
+    Thread::Current()->RemoveSuspendTrigger();
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
+    return true;
+  }
+  VLOG(signals) << "Not a suspend check match, first instruction mismatch";
   return false;
 }
 
-bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
+// The stack overflow check is done using the following instruction:
+// test eax, [esp+ -xxx]
+// where 'xxx' is the size of the overflow area.
+//
+// This is done before any frame is established in the method.  The return
+// address for the previous method is on the stack at ESP.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uintptr_t sp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+    ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
+
+  Thread* self = Thread::Current();
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << pregion;
+
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  uc->uc_mcontext.gregs[REG_EAX] = pregion;
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  uc->uc_mcontext.gregs[REG_EIP] = reinterpret_cast<uintptr_t>(
+    art_quick_throw_stack_overflow_from_signal);
+
+  return true;
 }
 }       // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 24b9e46..68f46ad 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -173,6 +173,21 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, EAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the ESP is above the protected region.  We need to create a
+    // callee save frame and then move ESP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx                // get current stack pointer
+    mov %eax, %esp                // move ESP to the overflow region.
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/arch/x86_64/fault_handler_x86_64.cc b/runtime/arch/x86_64/fault_handler_x86_64.cc
index 233d3c7..88ae7f3 100644
--- a/runtime/arch/x86_64/fault_handler_x86_64.cc
+++ b/runtime/arch/x86_64/fault_handler_x86_64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 3112bc0..f99ce07 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -29,9 +29,7 @@
 #include "mirror/object-inl.h"
 #include "object_utils.h"
 #include "scoped_thread_state_change.h"
-#ifdef HAVE_ANDROID_OS
 #include "sigchain.h"
-#endif
 #include "verify_object-inl.h"
 
 namespace art {
@@ -47,6 +45,7 @@
 
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
+  // std::cout << "handling fault in ART handler\n";
   fault_manager.HandleFault(sig, info, context);
 }
 
@@ -55,9 +54,7 @@
 }
 
 FaultManager::~FaultManager() {
-#ifdef HAVE_ANDROID_OS
   UnclaimSignalChain(SIGSEGV);
-#endif
   sigaction(SIGSEGV, &oldaction_, nullptr);   // Restore old handler.
 }
 
@@ -72,11 +69,12 @@
 #endif
 
   // Set our signal handler now.
-  sigaction(SIGSEGV, &action, &oldaction_);
-#ifdef HAVE_ANDROID_OS
+  int e = sigaction(SIGSEGV, &action, &oldaction_);
+  if (e != 0) {
+    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
+  }
   // Make sure our signal handler is called before any user handlers.
   ClaimSignalChain(SIGSEGV, &oldaction_);
-#endif
 }
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
@@ -84,8 +82,12 @@
   //
   // If malloc calls abort, it will be holding its lock.
   // If the handler tries to call malloc, it will deadlock.
+
+  // Also, there is only an 8K stack available here to logging can cause memory
+  // overwrite issues if you are unlucky.  If you want to enable logging and
+  // are getting crashes, allocate more space for the alternate signal stack.
   VLOG(signals) << "Handling fault";
-  if (IsInGeneratedCode(context, true)) {
+  if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
@@ -101,11 +103,8 @@
   }
   art_sigsegv_fault();
 
-#ifdef HAVE_ANDROID_OS
+  // Pass this on to the next handler in the chain, or the default if none.
   InvokeUserSignalHandler(sig, info, context);
-#else
-  oldaction_.sa_sigaction(sig, info, context);
-#endif
 }
 
 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
@@ -132,7 +131,7 @@
 
 // This function is called within the signal handler.  It checks that
 // the mutator_lock is held (shared).  No annotalysis is done.
-bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
+bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
   VLOG(signals) << "Checking for generated code";
@@ -161,7 +160,7 @@
 
   // Get the architecture specific method address and return address.  These
   // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
-  GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
+  GetMethodAndReturnPCAndSP(siginfo, context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
@@ -242,12 +241,12 @@
 
 bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  if (manager_->IsInGeneratedCode(context, false)) {
+  if (manager_->IsInGeneratedCode(siginfo, context, false)) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     mirror::ArtMethod* method = nullptr;
     uintptr_t return_pc = 0;
     uintptr_t sp = 0;
-    manager_->GetMethodAndReturnPCAndSP(context, &method, &return_pc, &sp);
+    manager_->GetMethodAndReturnPCAndSP(siginfo, context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     StackReference<mirror::ArtMethod>* frame =
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 026f5b9..71c9977 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -43,9 +43,10 @@
   void HandleFault(int sig, siginfo_t* info, void* context);
   void AddHandler(FaultHandler* handler, bool generated_code);
   void RemoveHandler(FaultHandler* handler);
-  void GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+  void GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context, mirror::ArtMethod** out_method,
                                  uintptr_t* out_return_pc, uintptr_t* out_sp);
-  bool IsInGeneratedCode(void *context, bool check_dex_pc) NO_THREAD_SAFETY_ANALYSIS;
+  bool IsInGeneratedCode(siginfo_t* siginfo, void *context, bool check_dex_pc)
+                         NO_THREAD_SAFETY_ANALYSIS;
 
  private:
   std::vector<FaultHandler*> generated_code_handlers_;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index e1e133f..333ba03 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -286,8 +286,8 @@
     }
   }
 #else
-  explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-    kExplicitStackOverflowCheck;
+  // Host.  Only suspend check is explicit by default.
+  explicit_checks_ = kExplicitSuspendCheck;
 #endif
 
   for (size_t i = 0; i < options.size(); ++i) {
@@ -305,6 +305,7 @@
       Exit(0);
     } else if (StartsWith(option, "-Xbootclasspath:")) {
       boot_class_path_string_ = option.substr(strlen("-Xbootclasspath:")).data();
+      LOG(INFO) << "setting boot class path to " << boot_class_path_string_;
     } else if (option == "-classpath" || option == "-cp") {
       // TODO: support -Djava.class.path
       i++;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index efa205e..6459a52 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -580,10 +580,41 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
+  heap_ = new gc::Heap(options->heap_initial_size_,
+                       options->heap_growth_limit_,
+                       options->heap_min_free_,
+                       options->heap_max_free_,
+                       options->heap_target_utilization_,
+                       options->foreground_heap_growth_multiplier_,
+                       options->heap_maximum_size_,
+                       options->image_,
+                       options->image_isa_,
+                       options->collector_type_,
+                       options->background_collector_type_,
+                       options->parallel_gc_threads_,
+                       options->conc_gc_threads_,
+                       options->low_memory_mode_,
+                       options->long_pause_log_threshold_,
+                       options->long_gc_log_threshold_,
+                       options->ignore_max_footprint_,
+                       options->use_tlab_,
+                       options->verify_pre_gc_heap_,
+                       options->verify_pre_sweeping_heap_,
+                       options->verify_post_gc_heap_,
+                       options->verify_pre_gc_rosalloc_,
+                       options->verify_pre_sweeping_rosalloc_,
+                       options->verify_post_gc_rosalloc_);
+
+  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
+
+  BlockSignals();
+  InitPlatformSignalHandlers();
+
   bool implicit_checks_supported = false;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
+    case kX86:
       implicit_checks_supported = true;
       break;
     default:
@@ -615,36 +646,6 @@
     }
   }
 
-  heap_ = new gc::Heap(options->heap_initial_size_,
-                       options->heap_growth_limit_,
-                       options->heap_min_free_,
-                       options->heap_max_free_,
-                       options->heap_target_utilization_,
-                       options->foreground_heap_growth_multiplier_,
-                       options->heap_maximum_size_,
-                       options->image_,
-                       options->image_isa_,
-                       options->collector_type_,
-                       options->background_collector_type_,
-                       options->parallel_gc_threads_,
-                       options->conc_gc_threads_,
-                       options->low_memory_mode_,
-                       options->long_pause_log_threshold_,
-                       options->long_gc_log_threshold_,
-                       options->ignore_max_footprint_,
-                       options->use_tlab_,
-                       options->verify_pre_gc_heap_,
-                       options->verify_pre_sweeping_heap_,
-                       options->verify_post_gc_heap_,
-                       options->verify_pre_gc_rosalloc_,
-                       options->verify_pre_sweeping_rosalloc_,
-                       options->verify_post_gc_rosalloc_);
-
-  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
-
-  BlockSignals();
-  InitPlatformSignalHandlers();
-
   java_vm_ = new JavaVMExt(this, options.get());
 
   Thread::Startup();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d60fb49..4147dc2 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -233,47 +233,99 @@
   return stack_size;
 }
 
+// Global variable to prevent the compiler optimizing away the page reads for the stack.
+byte dont_optimize_this;
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_end_.  Just below that
 // is the StackOverflow reserved region used when creating the StackOverflow
 // exception.
+//
+// There is a little complexity here that deserves a special mention.  When running on the
+// host (glibc), the process's main thread's stack is allocated with a special flag
+// to prevent memory being allocated when it's not needed.  This flag makes the
+// kernel only allocate memory for the stack by growing down in memory.  Because we
+// want to put an mprotected region far away from that at the stack top, we need
+// to make sure the pages for the stack are mapped in before we call mprotect.  We do
+// this by reading every page from the stack bottom (highest address) to the stack top.
+// We then madvise this away.
 void Thread::InstallImplicitProtection(bool is_main_stack) {
   byte* pregion = tlsPtr_.stack_end;
+  byte* stack_lowmem = tlsPtr_.stack_begin;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
+      ~(kPageSize - 1));    // Page containing current top of stack.
 
+#ifndef HAVE_ANDROID_OS
+  bool running_on_host = true;
+#else
+  bool running_on_host = false;
+#endif
+
+  if (running_on_host) {
+    // On Host, we need to map in the main stack.  This must be done by reading from the
+    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
+    // in the kernel.  Any access more than a page below the current SP will cause
+    // a segv.
+    if (is_main_stack) {
+      // First we need to unprotect the protected region because this may
+      // be called more than once for a particular stack and we will crash
+      // if we try to read the protected page.
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
+
+      // Read every page from the high address to the low.
+      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
+        dont_optimize_this = *p;
+      }
+    }
+  }
+
+  // Check and place a marker word at the lowest usable address in the stack.  This
+  // is used to prevent a double protection.
   constexpr uint32_t kMarker = 0xdadadada;
   uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
   if (*marker == kMarker) {
-    // The region has already been set up.
+    // The region has already been set up.  But on the main stack on the host we have
+    // removed the protected region in order to read the stack memory.  We need to put
+    // this back again.
+    if (is_main_stack && running_on_host) {
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
+      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
+    }
     return;
   }
   // Add marker so that we can detect a second attempt to do this.
   *marker = kMarker;
 
-  pregion -= kStackOverflowProtectedSize;
-
-  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.  We only need to touch one byte per page.
-  if (is_main_stack) {
-    byte* start = pregion;
-    byte* end = pregion + kStackOverflowProtectedSize;
-    while (start < end) {
-      *start = static_cast<byte>(0);
-      start += kPageSize;
+  if (!running_on_host) {
+    // Running on Android, stacks are mapped cleanly.  The protected region for the
+    // main stack just needs to be mapped in.  We do this by writing one byte per page.
+    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
+      *p = 0;
     }
   }
 
+  pregion -= kStackOverflowProtectedSize;
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
+
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
 
   // Tell the kernel that we won't be needing these pages any more.
+  // NB. madvise will probably write zeroes into the memory (on linux it does).
   if (is_main_stack) {
-    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    if (running_on_host) {
+      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
+      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
+    } else {
+      // On Android, just the protected region.
+      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    }
   }
 }
 
@@ -534,13 +586,17 @@
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
     if (is_main_thread) {
-      // The main thread has a 16K protected region at the bottom.  We need
+      size_t guardsize;
+      pthread_attr_t attributes;
+      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+      // The main thread might have protected region at the bottom.  We need
       // to install our own region so we need to move the limits
       // of the stack to make room for it.
-      constexpr uint32_t kDelta = 16 * KB;
-      tlsPtr_.stack_begin += kDelta;
-      tlsPtr_.stack_end += kDelta;
-      tlsPtr_.stack_size -= kDelta;
+      tlsPtr_.stack_begin += guardsize;
+      tlsPtr_.stack_end += guardsize;
+      tlsPtr_.stack_size -= guardsize;
     }
     InstallImplicitProtection(is_main_thread);
   }
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index ee66ccc..518211b 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -35,8 +35,8 @@
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
   stack_t ss;
-  ss.ss_sp = new uint8_t[SIGSTKSZ];
-  ss.ss_size = SIGSTKSZ;
+  ss.ss_sp = new uint8_t[SIGSTKSZ * 2];   // NB. this is 16K.
+  ss.ss_size = SIGSTKSZ * 2;
   ss.ss_flags = 0;
   CHECK(ss.ss_sp != NULL);
   SigAltStack(&ss, NULL);
@@ -56,7 +56,7 @@
   // Tell the kernel to stop using it.
   ss.ss_sp = NULL;
   ss.ss_flags = SS_DISABLE;
-  ss.ss_size = SIGSTKSZ;  // Avoid ENOMEM failure with Mac OS' buggy libc.
+  ss.ss_size = SIGSTKSZ * 2;  // Avoid ENOMEM failure with Mac OS' buggy libc.
   SigAltStack(&ss, NULL);
 
   // Free it.
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index 8e25339..20c8cac 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -28,3 +28,16 @@
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common_build.mk
 include $(BUILD_SHARED_LIBRARY)
+
+# Build host library.
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_IS_HOST_MODULE := true
+LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+LOCAL_SRC_FILES := sigchain.cc
+LOCAL_MODULE:= libsigchain
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_LDLIBS = -ldl
+LOCAL_MULTILIB := both
+include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 26e7d31..73cc9eb 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,13 @@
  * limitations under the License.
  */
 
+#ifdef HAVE_ANDROID_OS
 #include <android/log.h>
+#else
+#include <stdarg.h>
+#include <iostream>
+#endif
+
 #include <dlfcn.h>
 #include <signal.h>
 #include <stdio.h>
@@ -67,7 +73,11 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
+#ifdef HAVE_ANDROID_OS
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
+#else
+  std::cout << buf << "\n";
+#endif
   va_end(ap);
 }
 
@@ -109,10 +119,16 @@
   if ((action.sa_flags & SA_SIGINFO) == 0) {
     if (action.sa_handler != NULL) {
       action.sa_handler(sig);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   } else {
     if (action.sa_sigaction != NULL) {
       action.sa_sigaction(sig, info, context);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   }
 }
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index fec2540..efd1793 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -203,6 +203,7 @@
 	ANDROID_ROOT=$(HOST_OUT) \
 	ANDROID_LOG_TAGS='*:d' \
 	LD_LIBRARY_PATH=$$($(2)ART_HOST_OUT_SHARED_LIBRARIES) \
+	LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) \
 	$(HOST_OUT_EXECUTABLES)/dalvikvm$$($(2)ART_PHONY_TEST_HOST_SUFFIX) $(DALVIKVM_FLAGS) $(5) \
 	    -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$$(HOST_CORE_IMG_LOCATION) \
 	    -classpath $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar \