AArch64: Implicit StackOverflow/NPE/Suspend checks.

This implements implicit stack overflow checks and null pointer exceptions
for AArch64.  Suspend checks are implemented but not switched on yet.

Change-Id: I2eb076f2c0c9d94793d5a898fea49cf409b4eb66
Signed-off-by: Stuart Monteith <stuart.monteith@arm.com>
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6fa8a4a..f00555a 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -322,6 +322,11 @@
   LockTemp(rs_xIP0);
   LockTemp(rs_xIP1);
 
+  /* TUNING:
+   * Use AllocTemp() and reuse LR if possible to give us the freedom on adjusting the number
+   * of temp registers.
+   */
+
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
@@ -339,16 +344,15 @@
       // Load stack limit
       LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
     } else {
-      // TODO(Arm64) Implement implicit checks.
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      // Load32Disp(rs_wSP, -Thread::kStackOverflowReservedBytes, rs_wzr);
-      // MarkPossibleStackOverflowException();
-      //
+
       // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
       //       so that we can avoid the following "sub sp" when spilling?
-      LOG(FATAL) << "Implicit stack overflow checks not implemented.";
+      OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
+      LoadWordDisp(rs_x8, 0, rs_x8);
+      MarkPossibleStackOverflowException();
     }
   }
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 0437f30..ac3eb39 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1167,6 +1167,7 @@
   switch (instruction_set) {
     case kArm:
     case kThumb2:
+    case kArm64:
     case kX86:
       implicit_null_checks = true;
       implicit_so_checks = true;
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 34eede6..dc82cc2 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -21,7 +21,15 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "registers_arm64.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
 
+extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_implicit_suspend();
 
 //
 // ARM64 specific fault handler functions.
@@ -32,17 +40,160 @@
 void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  *out_sp = static_cast<uintptr_t>(sc->sp);
+  VLOG(signals) << "sp: " << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in x0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kArm64));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->regs[0]);
+  } else {
+    // The method is at the top of the stack.
+    *out_method = (reinterpret_cast<StackReference<mirror::ArtMethod>* >(*out_sp)[0]).AsMirrorPtr();
+  }
+
+  // Work out the return PC.  This will be the address of the instruction
+  // following the faulting ldr/str instruction.
+  VLOG(signals) << "pc: " << std::hex
+      << static_cast<void*>(reinterpret_cast<uint8_t*>(sc->pc));
+
+  *out_return_pc = sc->pc + 4;
 }
 
 bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  // The code that looks for the catch location needs to know the value of the
+  // PC at the point of call.  For Null checks we insert a GC map that is immediately after
+  // the load/store instruction that might cause the fault.
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+
+  sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
+
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
 }
 
+// A suspend check is done using the following instruction sequence:
+//      0xf7223228: f9405640  ldr x0, [x18, #168]
+// .. some intervening instructions
+//      0xf7223230: f9400000  ldr x0, [x0]
+
+// The offset from r18 is Thread::ThreadSuspendTriggerOffset().
+// To check for a suspend check, we examine the instructions that caused
+// the fault (at PC-4 and PC).
 bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  // These are the instructions to check for.  The first one is the ldr x0,[r18,#xxx]
+  // where xxx is the offset of the suspend trigger.
+  uint32_t checkinst1 = 0xf9400240 | (Thread::ThreadSuspendTriggerOffset<8>().Int32Value() << 7);
+  uint32_t checkinst2 = 0xf9400000;
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->pc);
+  uint8_t* ptr1 = ptr2 - 4;
+  VLOG(signals) << "checking suspend";
+
+  uint32_t inst2 = *reinterpret_cast<uint32_t*>(ptr2);
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  if (inst2 != checkinst2) {
+    // Second instruction is not good, not ours.
+    return false;
+  }
+
+  // The first instruction can a little bit up the stream due to load hoisting
+  // in the compiler.
+  uint8_t* limit = ptr1 - 80;   // Compiler will hoist to a max of 20 instructions.
+  bool found = false;
+  while (ptr1 > limit) {
+    uint32_t inst1 = *reinterpret_cast<uint32_t*>(ptr1);
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    if (inst1 == checkinst1) {
+      found = true;
+      break;
+    }
+    ptr1 -= 4;
+  }
+  if (found) {
+    VLOG(signals) << "suspend check match";
+    // This is a suspend check.  Arrange for the signal handler to return to
+    // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
+    // will resume the instruction (current PC + 4).  PC points to the
+    // ldr x0,[x0,#0] instruction (r0 will be 0, set by the trigger).
+
+    sc->regs[30] = sc->pc + 4;
+    sc->pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
+
+    // Now remove the suspend trigger that caused this fault.
+    Thread::Current()->RemoveSuspendTrigger();
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
+    return true;
+  }
   return false;
 }
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
+
+  uintptr_t sp = sc->sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
+
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+      ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm64);
+
+  Thread* self = reinterpret_cast<Thread*>(sc->regs[art::arm64::TR]);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
+
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
+
+  // Now establish the stack pointer for the signal return.
+  sc->sp = prevsp;
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->regs[art::arm64::IP0] = sp;      // aka x16
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->pc.
+  return true;
 }
 }       // namespace art
+
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ba85d32..04be4a2 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -435,6 +435,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * xSELF: thread
+   * SP: address of last known frame
+   * IP0: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in x16/IP0.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov x0, xSELF                   // pass Thread::Current
+    mov x1, sp                      // pass SP
+    mov sp, xIP0                    // move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                  // artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
@@ -1323,6 +1348,14 @@
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_test_suspend
 
+ENTRY art_quick_implicit_suspend
+    mov    x0, xSELF
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    mov    x1, sp
+    bl     artTestSuspendFromCode             // (Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+END art_quick_implicit_suspend
+
      /*
      * Called by managed code that is attempting to call a method on a proxy class. On entry
      * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2c25c2c..fe877d5 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -620,6 +620,7 @@
     case kArm:
     case kThumb2:
     case kX86:
+    case kArm64:
       implicit_null_checks_ = true;
       implicit_so_checks_ = true;
       break;