Reland^2 "ART: Rewrite compiled code check in FaultHandler."

This reverts commit 0110e952e488bc41429f6f33f36e8884f41a26d8.

Reason for revert: Reland with a fix: Release the
mutator lock before requesting an empty checkpoint.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing --jit
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 38383823
Change-Id: Idf82d3b77465453b8e70b40e32af193f266b357b
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 7bd402f..974e056 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -45,76 +45,59 @@
   return instr_size;
 }
 
-void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED,
-                                             void* context,
-                                             ArtMethod** out_method,
-                                             uintptr_t* out_return_pc,
-                                             uintptr_t* out_sp,
-                                             bool* out_is_stack_overflow) {
+uintptr_t FaultManager::GetFaultPc(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  VLOG(signals) << "sp: " << std::hex << *out_sp;
-  if (*out_sp == 0) {
-    return;
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  if (sc->arm_sp == 0) {
+    VLOG(signals) << "Missing SP";
+    return 0u;
   }
+  return sc->arm_pc;
+}
 
-  // In the case of a stack overflow, the stack is not valid and we can't
-  // get the method from the top of the stack.  However it's in r0.
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(InstructionSet::kArm));
-  if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->arm_r0);
-    *out_is_stack_overflow = true;
-  } else {
-    // The method is at the top of the stack.
-    *out_method = reinterpret_cast<ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
-    *out_is_stack_overflow = false;
-  }
-
-  // Work out the return PC.  This will be the address of the instruction
-  // following the faulting ldr/str instruction.  This is in thumb mode so
-  // the instruction might be a 16 or 32 bit one.  Also, the GC map always
-  // has the bottom bit of the PC set so we also need to set that.
-
-  // Need to work out the size of the instruction that caused the exception.
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  VLOG(signals) << "pc: " << std::hex << static_cast<void*>(ptr);
-
-  if (ptr == nullptr) {
-    // Somebody jumped to 0x0. Definitely not ours, and will definitely segfault below.
-    *out_method = nullptr;
-    return;
-  }
-
-  uint32_t instr_size = GetInstructionSize(ptr);
-
-  *out_return_pc = (sc->arm_pc + instr_size) | 1;
+uintptr_t FaultManager::GetFaultSp(void* context) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  return sc->arm_sp;
 }
 
 bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
-  if (!IsValidImplicitCheck(info)) {
+  uintptr_t fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
+  if (!IsValidFaultAddress(fault_address)) {
     return false;
   }
-  // The code that looks for the catch location needs to know the value of the
-  // ARM PC at the point of call.  For Null checks we insert a GC map that is immediately after
-  // the load/store instruction that might cause the fault.  However the mapping table has
-  // the low bits set for thumb mode so we need to set the bottom bit for the LR
-  // register in order to find the mapping.
+
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  ArtMethod** sp = reinterpret_cast<ArtMethod**>(sc->arm_sp);
+  if (!IsValidMethod(*sp)) {
+    return false;
+  }
+
+  // For null checks in compiled code we insert a stack map that is immediately
+  // after the load/store instruction that might cause the fault and we need to
+  // pass the return PC to the handler. For null checks in Nterp, we similarly
+  // need the return PC to recognize that this was a null check in Nterp, so
+  // that the handler can get the needed data from the Nterp frame.
+
+  // Note: Currently, Nterp is compiled to the A32 instruction set and managed
+  // code is compiled to the T32 instruction set.
+  // To find the stack map for compiled code, we need to set the bottom bit in
+  // the return PC indicating T32 just like we would if we were going to return
+  // to that PC (though we're going to jump to the exception handler instead).
 
   // Need to work out the size of the instruction that caused the exception.
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
   bool in_thumb_mode = sc->arm_cpsr & (1 << 5);
   uint32_t instr_size = in_thumb_mode ? GetInstructionSize(ptr) : 4;
-  uintptr_t gc_map_location = (sc->arm_pc + instr_size) | (in_thumb_mode ? 1 : 0);
+  uintptr_t return_pc = (sc->arm_pc + instr_size) | (in_thumb_mode ? 1 : 0);
 
-  // Push the gc map location to the stack and pass the fault address in LR.
+  // Push the return PC to the stack and pass the fault address in LR.
   sc->arm_sp -= sizeof(uintptr_t);
-  *reinterpret_cast<uintptr_t*>(sc->arm_sp) = gc_map_location;
-  sc->arm_lr = reinterpret_cast<uintptr_t>(info->si_addr);
+  *reinterpret_cast<uintptr_t*>(sc->arm_sp) = return_pc;
+  sc->arm_lr = fault_address;
+
+  // Arrange for the signal handler to return to the NPE entrypoint.
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   // Make sure the thumb bit is set as the handler is in thumb mode.
   sc->arm_cpsr = sc->arm_cpsr | (1 << 5);
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index a5becf6..9634492 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -38,66 +38,56 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo,
-                                             void* context,
-                                             ArtMethod** out_method,
-                                             uintptr_t* out_return_pc,
-                                             uintptr_t* out_sp,
-                                             bool* out_is_stack_overflow) {
-  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-
+uintptr_t FaultManager::GetFaultPc(siginfo_t* siginfo, void* context) {
   // SEGV_MTEAERR (Async MTE fault) is delivered at an arbitrary point after the actual fault.
   // Register contents, including PC and SP, are unrelated to the fault and can only confuse ART
   // signal handlers.
   if (siginfo->si_signo == SIGSEGV && siginfo->si_code == SEGV_MTEAERR) {
-    return;
+    VLOG(signals) << "Async MTE fault";
+    return 0u;
   }
 
-  *out_sp = static_cast<uintptr_t>(sc->sp);
-  VLOG(signals) << "sp: " << *out_sp;
-  if (*out_sp == 0) {
-    return;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  if (sc->sp == 0) {
+    VLOG(signals) << "Missing SP";
+    return 0u;
   }
+  return sc->pc;
+}
 
-  // In the case of a stack overflow, the stack is not valid and we can't
-  // get the method from the top of the stack.  However it's in x0.
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(InstructionSet::kArm64));
-  if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->regs[0]);
-    *out_is_stack_overflow = true;
-  } else {
-    // The method is at the top of the stack.
-    *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
-    *out_is_stack_overflow = false;
-  }
-
-  // Work out the return PC.  This will be the address of the instruction
-  // following the faulting ldr/str instruction.
-  VLOG(signals) << "pc: " << std::hex
-      << static_cast<void*>(reinterpret_cast<uint8_t*>(sc->pc));
-
-  *out_return_pc = sc->pc + 4;
+uintptr_t FaultManager::GetFaultSp(void* context) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  return sc->sp;
 }
 
 bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
-  if (!IsValidImplicitCheck(info)) {
+  uintptr_t fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
+  if (!IsValidFaultAddress(fault_address)) {
     return false;
   }
-  // The code that looks for the catch location needs to know the value of the
-  // PC at the point of call.  For Null checks we insert a GC map that is immediately after
-  // the load/store instruction that might cause the fault.
 
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  // For null checks in compiled code we insert a stack map that is immediately
+  // after the load/store instruction that might cause the fault and we need to
+  // pass the return PC to the handler. For null checks in Nterp, we similarly
+  // need the return PC to recognize that this was a null check in Nterp, so
+  // that the handler can get the needed data from the Nterp frame.
 
-  // Push the gc map location to the stack and pass the fault address in LR.
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  ArtMethod** sp = reinterpret_cast<ArtMethod**>(sc->sp);
+  uintptr_t return_pc = sc->pc + 4u;
+  if (!IsValidMethod(*sp) || !IsValidReturnPc(sp, return_pc)) {
+    return false;
+  }
+
+  // Push the return PC to the stack and pass the fault address in LR.
   sc->sp -= sizeof(uintptr_t);
-  *reinterpret_cast<uintptr_t*>(sc->sp) = sc->pc + 4;
-  sc->regs[30] = reinterpret_cast<uintptr_t>(info->si_addr);
+  *reinterpret_cast<uintptr_t*>(sc->sp) = return_pc;
+  sc->regs[30] = fault_address;
 
+  // Arrange for the signal handler to return to the NPE entrypoint.
   sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   VLOG(signals) << "Generating null pointer exception";
   return true;
@@ -112,12 +102,11 @@
   constexpr uint32_t checkinst =
       0xf9400000 | (kSuspendCheckRegister << 5) | (kSuspendCheckRegister << 0);
 
-  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  VLOG(signals) << "checking suspend";
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
   uint32_t inst = *reinterpret_cast<uint32_t*>(sc->pc);
-  VLOG(signals) << "inst: " << std::hex << inst << " checkinst: " << checkinst;
+  VLOG(signals) << "checking suspend; inst: " << std::hex << inst << " checkinst: " << checkinst;
   if (inst != checkinst) {
     // The instruction is not good, not ours.
     return false;
@@ -141,8 +130,8 @@
 
 bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
                                   void* context) {
-  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext* sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 3a08ec5..c485f0d 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -25,6 +25,7 @@
 #include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "base/safe_copy.h"
+#include "oat_quick_method_header.h"
 #include "runtime_globals.h"
 #include "thread-current-inl.h"
 
@@ -77,30 +78,18 @@
 
 // Get the size of an instruction in bytes.
 // Return 0 if the instruction is not handled.
-static uint32_t GetInstructionSize(const uint8_t* pc) {
-  // Don't segfault if pc points to garbage.
-  char buf[15];  // x86/x86-64 have a maximum instruction length of 15 bytes.
-  ssize_t bytes = SafeCopy(buf, pc, sizeof(buf));
-
-  if (bytes == 0) {
-    // Nothing was readable.
-    return 0;
-  }
-
-  if (bytes == -1) {
-    // SafeCopy not supported, assume that the entire range is readable.
-    bytes = 16;
-  } else {
-    pc = reinterpret_cast<uint8_t*>(buf);
-  }
-
-#define INCREMENT_PC()          \
-  do {                          \
-    pc++;                       \
-    if (pc - startpc > bytes) { \
-      return 0;                 \
-    }                           \
+static uint32_t GetInstructionSize(const uint8_t* pc, size_t bytes) {
+#define FETCH_OR_SKIP_BYTE(assignment)  \
+  do {                                  \
+    if (bytes == 0u) {                  \
+      return 0u;                        \
+    }                                   \
+    (assignment);                       \
+    ++pc;                               \
+    --bytes;                            \
   } while (0)
+#define FETCH_BYTE(var) FETCH_OR_SKIP_BYTE((var) = *pc)
+#define SKIP_BYTE() FETCH_OR_SKIP_BYTE((void)0)
 
 #if defined(__x86_64)
   const bool x86_64 = true;
@@ -110,8 +99,8 @@
 
   const uint8_t* startpc = pc;
 
-  uint8_t opcode = *pc;
-  INCREMENT_PC();
+  uint8_t opcode;
+  FETCH_BYTE(opcode);
   uint8_t modrm;
   bool has_modrm = false;
   bool two_byte = false;
@@ -143,8 +132,7 @@
 
       // Group 4
       case 0x67:
-        opcode = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(opcode);
         prefix_present = true;
         break;
     }
@@ -154,15 +142,13 @@
   }
 
   if (x86_64 && opcode >= 0x40 && opcode <= 0x4f) {
-    opcode = *pc;
-    INCREMENT_PC();
+    FETCH_BYTE(opcode);
   }
 
   if (opcode == 0x0f) {
     // Two byte opcode
     two_byte = true;
-    opcode = *pc;
-    INCREMENT_PC();
+    FETCH_BYTE(opcode);
   }
 
   bool unhandled_instruction = false;
@@ -175,8 +161,7 @@
       case 0xb7:
       case 0xbe:        // movsx
       case 0xbf:
-        modrm = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(modrm);
         has_modrm = true;
         break;
       default:
@@ -195,32 +180,28 @@
       case 0x3c:
       case 0x3d:
       case 0x85:        // test.
-        modrm = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(modrm);
         has_modrm = true;
         break;
 
       case 0x80:        // group 1, byte immediate.
       case 0x83:
       case 0xc6:
-        modrm = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(modrm);
         has_modrm = true;
         immediate_size = 1;
         break;
 
       case 0x81:        // group 1, word immediate.
       case 0xc7:        // mov
-        modrm = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(modrm);
         has_modrm = true;
         immediate_size = operand_size_prefix ? 2 : 4;
         break;
 
       case 0xf6:
       case 0xf7:
-        modrm = *pc;
-        INCREMENT_PC();
+        FETCH_BYTE(modrm);
         has_modrm = true;
         switch ((modrm >> 3) & 7) {  // Extract "reg/opcode" from "modr/m".
           case 0:  // test
@@ -255,7 +236,7 @@
 
     // Check for SIB.
     if (mod != 3U /* 0b11 */ && (modrm & 7U /* 0b111 */) == 4) {
-      INCREMENT_PC();     // SIB
+      SKIP_BYTE();  // SIB
     }
 
     switch (mod) {
@@ -271,86 +252,79 @@
   pc += displacement_size + immediate_size;
 
   VLOG(signals) << "x86 instruction length calculated as " << (pc - startpc);
-  if (pc - startpc > bytes) {
-    return 0;
-  }
   return pc - startpc;
+
+#undef SKIP_BYTE
+#undef FETCH_BYTE
+#undef FETCH_OR_SKIP_BYTE
 }
 
-void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
-                                             ArtMethod** out_method,
-                                             uintptr_t* out_return_pc,
-                                             uintptr_t* out_sp,
-                                             bool* out_is_stack_overflow) {
+uintptr_t FaultManager::GetFaultPc(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
-  *out_sp = static_cast<uintptr_t>(uc->CTX_ESP);
-  VLOG(signals) << "sp: " << std::hex << *out_sp;
-  if (*out_sp == 0) {
-    return;
+  if (uc->CTX_ESP == 0) {
+    VLOG(signals) << "Missing SP";
+    return 0u;
   }
+  return uc->CTX_EIP;
+}
 
-  // In the case of a stack overflow, the stack is not valid and we can't
-  // get the method from the top of the stack.  However it's in EAX(x86)/RDI(x86_64).
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-#if defined(__x86_64__)
-      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(InstructionSet::kX86_64));
-#else
-      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(InstructionSet::kX86));
-#endif
-  if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(uc->CTX_METHOD);
-    *out_is_stack_overflow = true;
-  } else {
-    // The method is at the top of the stack.
-    *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
-    *out_is_stack_overflow = false;
-  }
-
-  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
-  VLOG(signals) << HexDump(pc, 32, true, "PC ");
-
-  if (pc == nullptr) {
-    // Somebody jumped to 0x0. Definitely not ours, and will definitely segfault below.
-    *out_method = nullptr;
-    return;
-  }
-
-  uint32_t instr_size = GetInstructionSize(pc);
-  if (instr_size == 0) {
-    // Unknown instruction, tell caller it's not ours.
-    *out_method = nullptr;
-    return;
-  }
-  *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
+uintptr_t FaultManager::GetFaultSp(void* context) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  return uc->CTX_ESP;
 }
 
 bool NullPointerHandler::Action(int, siginfo_t* sig, void* context) {
-  if (!IsValidImplicitCheck(sig)) {
-    return false;
-  }
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
-  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
-
-  uint32_t instr_size = GetInstructionSize(pc);
-  if (instr_size == 0) {
-    // Unknown instruction, can't really happen.
+  uintptr_t fault_address = reinterpret_cast<uintptr_t>(sig->si_addr);
+  if (!IsValidFaultAddress(fault_address)) {
     return false;
   }
 
-  // We need to arrange for the signal handler to return to the null pointer
-  // exception generator.  The return address must be the address of the
-  // next instruction (this instruction + instruction size).  The return address
-  // is on the stack at the top address of the current frame.
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  ArtMethod** sp = reinterpret_cast<ArtMethod**>(uc->CTX_ESP);
+  ArtMethod* method = *sp;
+  if (!IsValidMethod(method)) {
+    return false;
+  }
 
-  // Push the return address and fault address onto the stack.
-  uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
-  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - 2 * sizeof(uintptr_t));
-  next_sp[1] = retaddr;
-  next_sp[0] = reinterpret_cast<uintptr_t>(sig->si_addr);
+  // For null checks in compiled code we insert a stack map that is immediately
+  // after the load/store instruction that might cause the fault and we need to
+  // pass the return PC to the handler. For null checks in Nterp, we similarly
+  // need the return PC to recognize that this was a null check in Nterp, so
+  // that the handler can get the needed data from the Nterp frame.
+
+  // Note: Allowing nested faults if `IsValidMethod()` returned a false positive.
+  // Note: The `ArtMethod::GetOatQuickMethodHeader()` can acquire locks, which is
+  // essentially unsafe in a signal handler, but we allow that here just like in
+  // `NullPointerHandler::IsValidReturnPc()`. For more details see comments there.
+  uintptr_t pc = uc->CTX_EIP;
+  const OatQuickMethodHeader* method_header = method->GetOatQuickMethodHeader(pc);
+  if (method_header == nullptr) {
+    VLOG(signals) << "No method header.";
+    return false;
+  }
+  const uint8_t* pc_ptr = reinterpret_cast<const uint8_t*>(pc);
+  size_t offset = pc_ptr - method_header->GetCode();
+  size_t code_size = method_header->GetCodeSize();
+  CHECK_LT(offset, code_size);
+  size_t max_instr_size = code_size - offset;
+  uint32_t instr_size = GetInstructionSize(pc_ptr, max_instr_size);
+  if (instr_size == 0u) {
+    // Unknown instruction (can't really happen) or not enough bytes until end of method code.
+    return false;
+  }
+
+  uintptr_t return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
+  if (!IsValidReturnPc(sp, return_pc)) {
+    return false;
+  }
+
+  // Push the return PC and fault address onto the stack.
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp) - 2;
+  next_sp[1] = return_pc;
+  next_sp[0] = fault_address;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
+  // Arrange for the signal handler to return to the NPE entrypoint.
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(
       art_quick_throw_null_pointer_exception_from_signal);
   VLOG(signals) << "Generating null pointer exception";
@@ -385,7 +359,7 @@
 #endif
   uint8_t checkinst2[] = {0x85, 0x00};
 
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
 
@@ -441,7 +415,7 @@
 // address for the previous method is on the stack at ESP.
 
 bool StackOverflowHandler::Action(int, siginfo_t* info, void* context) {
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   uintptr_t sp = static_cast<uintptr_t>(uc->CTX_ESP);
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 8f247ec..ed87669 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3919,7 +3919,8 @@
   std::string dex_file_location = dex_file.GetLocation();
   // The following paths checks don't work on preopt when using boot dex files, where the dex
   // cache location is the one on device, and the dex_file's location is the one on host.
-  if (!(Runtime::Current()->IsAotCompiler() && class_loader == nullptr && !kIsTargetBuild)) {
+  Runtime* runtime = Runtime::Current();
+  if (!(runtime->IsAotCompiler() && class_loader == nullptr && !kIsTargetBuild)) {
     CHECK_GE(dex_file_location.length(), dex_cache_length)
         << dex_cache_location << " " << dex_file.GetLocation();
     const std::string dex_file_suffix = dex_file_location.substr(
@@ -3931,7 +3932,7 @@
   }
 
   // Check if we need to initialize OatFile data (.data.bimg.rel.ro and .bss
-  // sections) needed for code execution.
+  // sections) needed for code execution and register the oat code range.
   const OatFile* oat_file =
       (dex_file.GetOatDexFile() != nullptr) ? dex_file.GetOatDexFile()->GetOatFile() : nullptr;
   bool initialize_oat_file_data = (oat_file != nullptr) && oat_file->IsExecutable();
@@ -3947,6 +3948,13 @@
   }
   if (initialize_oat_file_data) {
     oat_file->InitializeRelocations();
+    // Notify the fault handler about the new executable code range if needed.
+    size_t exec_offset = oat_file->GetOatHeader().GetExecutableOffset();
+    DCHECK_LE(exec_offset, oat_file->Size());
+    size_t exec_size = oat_file->Size() - exec_offset;
+    if (exec_size != 0u) {
+      runtime->AddGeneratedCodeRange(oat_file->Begin() + exec_offset, exec_size);
+    }
   }
 
   // Let hiddenapi assign a domain to the newly registered dex file.
@@ -10330,16 +10338,23 @@
       }
     }
   }
+  std::set<const OatFile*> unregistered_oat_files;
   if (!to_delete.empty()) {
     JavaVMExt* vm = self->GetJniEnv()->GetVm();
     WriterMutexLock mu(self, *Locks::dex_lock_);
     for (auto it = dex_caches_.begin(), end = dex_caches_.end(); it != end; ) {
+      const DexFile* dex_file = it->first;
       const DexCacheData& data = it->second;
       if (self->DecodeJObject(data.weak_root) == nullptr) {
         DCHECK(to_delete.end() != std::find_if(
             to_delete.begin(),
             to_delete.end(),
             [&](const ClassLoaderData& cld) { return cld.class_table == data.class_table; }));
+        if (dex_file->GetOatDexFile() != nullptr &&
+            dex_file->GetOatDexFile()->GetOatFile() != nullptr &&
+            dex_file->GetOatDexFile()->GetOatFile()->IsExecutable()) {
+          unregistered_oat_files.insert(dex_file->GetOatDexFile()->GetOatFile());
+        }
         vm->DeleteWeakGlobalRef(self, data.weak_root);
         it = dex_caches_.erase(it);
       } else {
@@ -10347,10 +10362,24 @@
       }
     }
   }
-  ScopedDebugDisallowReadBarriers sddrb(self);
-  for (ClassLoaderData& data : to_delete) {
-    // CHA unloading analysis and SingleImplementaion cleanups are required.
-    DeleteClassLoader(self, data, /*cleanup_cha=*/ true);
+  {
+    ScopedDebugDisallowReadBarriers sddrb(self);
+    for (ClassLoaderData& data : to_delete) {
+      // CHA unloading analysis and SingleImplementaion cleanups are required.
+      DeleteClassLoader(self, data, /*cleanup_cha=*/ true);
+    }
+  }
+  if (!unregistered_oat_files.empty()) {
+    for (const OatFile* oat_file : unregistered_oat_files) {
+      // Notify the fault handler about removal of the executable code range if needed.
+      DCHECK(oat_file->IsExecutable());
+      size_t exec_offset = oat_file->GetOatHeader().GetExecutableOffset();
+      DCHECK_LE(exec_offset, oat_file->Size());
+      size_t exec_size = oat_file->Size() - exec_offset;
+      if (exec_size != 0u) {
+        Runtime::Current()->RemoveGeneratedCodeRange(oat_file->Begin() + exec_offset, exec_size);
+      }
+    }
   }
 }
 
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index c6940fa..cf7369a 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -16,16 +16,17 @@
 
 #include "fault_handler.h"
 
+#include <atomic>
 #include <string.h>
 #include <sys/mman.h>
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
 #include "base/logging.h"  // For VLOG
+#include "base/membarrier.h"
 #include "base/safe_copy.h"
 #include "base/stl_util.h"
 #include "dex/dex_file_types.h"
-#include "gc/space/bump_pointer_space.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "mirror/class.h"
@@ -52,103 +53,16 @@
   return fault_manager.HandleFault(sig, info, context);
 }
 
-#if defined(__linux__)
+struct FaultManager::GeneratedCodeRange {
+  std::atomic<GeneratedCodeRange*> next;
+  const void* start;
+  size_t size;
+};
 
-// Change to verify the safe implementations against the original ones.
-constexpr bool kVerifySafeImpls = false;
-
-// Provide implementations of ArtMethod::GetDeclaringClass and VerifyClassClass that use SafeCopy
-// to safely dereference pointers which are potentially garbage.
-// Only available on Linux due to availability of SafeCopy.
-
-static mirror::Class* SafeGetDeclaringClass(ArtMethod* method)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (gUseUserfaultfd) {
-    // Avoid SafeCopy on userfaultfd updated memory ranges as kernel-space
-    // userfaults are not allowed, which can otherwise happen if compaction is
-    // simultaneously going on.
-    Runtime* runtime = Runtime::Current();
-    DCHECK_NE(runtime->GetHeap()->MarkCompactCollector(), nullptr);
-    GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
-    if (pool->Contains(method)) {
-      return method->GetDeclaringClassUnchecked<kWithoutReadBarrier>().Ptr();
-    }
-  }
-
-  char* method_declaring_class =
-      reinterpret_cast<char*>(method) + ArtMethod::DeclaringClassOffset().SizeValue();
-  // ArtMethod::declaring_class_ is a GcRoot<mirror::Class>.
-  // Read it out into as a CompressedReference directly for simplicity's sake.
-  mirror::CompressedReference<mirror::Class> cls;
-  ssize_t rc = SafeCopy(&cls, method_declaring_class, sizeof(cls));
-  CHECK_NE(-1, rc);
-
-  if (kVerifySafeImpls) {
-    ObjPtr<mirror::Class> actual_class = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
-    CHECK_EQ(actual_class, cls.AsMirrorPtr());
-  }
-
-  if (rc != sizeof(cls)) {
-    return nullptr;
-  }
-
-  return cls.AsMirrorPtr();
-}
-
-static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (gUseUserfaultfd) {
-    // Avoid SafeCopy on userfaultfd updated memory ranges as kernel-space
-    // userfaults are not allowed, which can otherwise happen if compaction is
-    // simultaneously going on.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    DCHECK_NE(heap->MarkCompactCollector(), nullptr);
-    if (heap->GetBumpPointerSpace()->Contains(obj)) {
-      return obj->GetClass();
-    }
-  }
-
-  char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue();
-  mirror::HeapReference<mirror::Class> cls;
-  ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls));
-  CHECK_NE(-1, rc);
-
-  if (kVerifySafeImpls) {
-    mirror::Class* actual_class = obj->GetClass<kVerifyNone>();
-    CHECK_EQ(actual_class, cls.AsMirrorPtr());
-  }
-
-  if (rc != sizeof(cls)) {
-    return nullptr;
-  }
-
-  return cls.AsMirrorPtr();
-}
-
-static bool SafeVerifyClassClass(mirror::Class* cls) REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Class* c_c = SafeGetClass(cls);
-  bool result = c_c != nullptr && c_c == SafeGetClass(c_c);
-
-  if (kVerifySafeImpls) {
-    CHECK_EQ(VerifyClassClass(cls), result);
-  }
-
-  return result;
-}
-
-#else
-
-static mirror::Class* SafeGetDeclaringClass(ArtMethod* method_obj)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  return method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>().Ptr();
-}
-
-static bool SafeVerifyClassClass(mirror::Class* cls) REQUIRES_SHARED(Locks::mutator_lock_) {
-  return VerifyClassClass(cls);
-}
-#endif
-
-
-FaultManager::FaultManager() : initialized_(false) {
+FaultManager::FaultManager()
+    : generated_code_ranges_lock_("FaultHandler generated code ranges lock",
+                                  LockLevel::kGenericBottomLock),
+      initialized_(false) {
   sigaction(SIGSEGV, nullptr, &oldaction_);
 }
 
@@ -172,6 +86,14 @@
   };
 
   AddSpecialSignalHandlerFn(SIGSEGV, &sa);
+
+  // Notify the kernel that we intend to use a specific `membarrier()` command.
+  int result = art::membarrier(MembarrierCommand::kRegisterPrivateExpedited);
+  if (result != 0) {
+    LOG(WARNING) << "FaultHandler: MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED failed: "
+                 << errno << " " << strerror(errno);
+  }
+
   initialized_ = true;
 }
 
@@ -189,6 +111,20 @@
     // Free all handlers.
     STLDeleteElements(&generated_code_handlers_);
     STLDeleteElements(&other_handlers_);
+
+    // Delete remaining code ranges if any (such as nterp code or oat code from
+    // oat files that have not been unloaded, including boot image oat files).
+    GeneratedCodeRange* range;
+    {
+      MutexLock lock(Thread::Current(), generated_code_ranges_lock_);
+      range = generated_code_ranges_.load(std::memory_order_acquire);
+      generated_code_ranges_.store(nullptr, std::memory_order_release);
+    }
+    while (range != nullptr) {
+      GeneratedCodeRange* next_range = range->next.load(std::memory_order_relaxed);
+      delete range;
+      range = next_range;
+    }
   }
 }
 
@@ -243,7 +179,7 @@
   raise(SIGSEGV);
 #endif
 
-  if (IsInGeneratedCode(info, context, true)) {
+  if (IsInGeneratedCode(info, context)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
@@ -290,37 +226,110 @@
   LOG(FATAL) << "Attempted to remove non existent handler " << handler;
 }
 
-static bool IsKnownPc(uintptr_t pc, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Check whether the pc is within nterp range.
-  if (OatQuickMethodHeader::IsNterpPc(pc)) {
-    return true;
+void FaultManager::AddGeneratedCodeRange(const void* start, size_t size) {
+  GeneratedCodeRange* new_range = new GeneratedCodeRange{nullptr, start, size};
+  {
+    MutexLock lock(Thread::Current(), generated_code_ranges_lock_);
+    GeneratedCodeRange* old_head = generated_code_ranges_.load(std::memory_order_relaxed);
+    new_range->next.store(old_head, std::memory_order_relaxed);
+    generated_code_ranges_.store(new_range, std::memory_order_release);
   }
 
-  // Check whether the pc is in the JIT code cache.
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(reinterpret_cast<const void*>(pc))) {
-    return true;
-  }
-
-  if (method->IsObsolete()) {
-    // Obsolete methods never happen on AOT code.
-    return false;
-  }
-
-  // Note: at this point, we trust it's truly an ArtMethod we found at the bottom of the stack,
-  // and we can find its oat file through it.
-  const OatDexFile* oat_dex_file = method->GetDeclaringClass()->GetDexFile().GetOatDexFile();
-  if (oat_dex_file != nullptr &&
-      oat_dex_file->GetOatFile()->Contains(reinterpret_cast<const void*>(pc))) {
-    return true;
-  }
-
-  return false;
+  // The above release operation on `generated_code_ranges_` with an acquire operation
+  // on the same atomic object in `IsInGeneratedCode()` ensures the correct memory
+  // visibility for the contents of `*new_range` for any thread that loads the value
+  // written above (or a value written by a release sequence headed by that write).
+  //
+  // However, we also need to ensure that any thread that encounters a segmentation
+  // fault in the provided range shall actually see the written value. For JIT code
+  // cache and nterp, the registration happens while the process is single-threaded
+  // but the synchronization is more complicated for code in oat files.
+  //
+  // Threads that load classes register dex files under the `Locks::dex_lock_` and
+  // the first one to register a dex file with a given oat file shall add the oat
+  // code range; the memory visibility for these threads is guaranteed by the lock.
+  // However a thread that did not try to load a class with oat code can execute the
+  // code if a direct or indirect reference to such class escapes from one of the
+  // threads that loaded it. Use `membarrier()` for memory visibility in this case.
+  art::membarrier(MembarrierCommand::kPrivateExpedited);
 }
 
-// This function is called within the signal handler.  It checks that
-// the mutator_lock is held (shared).  No annotalysis is done.
-bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
+void FaultManager::RemoveGeneratedCodeRange(const void* start, size_t size) {
+  Thread* self = Thread::Current();
+  GeneratedCodeRange* range = nullptr;
+  {
+    MutexLock lock(self, generated_code_ranges_lock_);
+    std::atomic<GeneratedCodeRange*>* before = &generated_code_ranges_;
+    range = before->load(std::memory_order_relaxed);
+    while (range != nullptr && range->start != start) {
+      before = &range->next;
+      range = before->load(std::memory_order_relaxed);
+    }
+    if (range != nullptr) {
+      GeneratedCodeRange* next = range->next.load(std::memory_order_relaxed);
+      if (before == &generated_code_ranges_) {
+        // Relaxed store directly to `generated_code_ranges_` would not satisfy
+        // conditions for a release sequence, so we need to use store-release.
+        before->store(next, std::memory_order_release);
+      } else {
+        // In the middle of the list, we can use a relaxed store as we're not
+        // publishing any newly written memory to potential reader threads.
+        // Whether they see the removed node or not is unimportant as we should
+        // not execute that code anymore. We're keeping the `next` link of the
+        // removed node, so that concurrent walk can use it to reach remaining
+        // retained nodes, if any.
+        before->store(next, std::memory_order_relaxed);
+      }
+    }
+  }
+  CHECK(range != nullptr);
+  DCHECK_EQ(range->start, start);
+  CHECK_EQ(range->size, size);
+
+  Runtime* runtime = Runtime::Current();
+  CHECK(runtime != nullptr);
+  if (runtime->IsStarted() && runtime->GetThreadList() != nullptr) {
+    // Run a checkpoint before deleting the range to ensure that no thread holds a
+    // pointer to the removed range while walking the list in `IsInGeneratedCode()`.
+    // That walk is guarded by checking that the thread is `Runnable`, so any walk
+    // started before the removal shall be done when running the checkpoint and the
+    // checkpoint also ensures the correct memory visibility of `next` links,
+    // so the thread shall not see the pointer during future walks.
+
+    // This function is currently called in different mutex and thread states.
+    // Semi-space GC performs the cleanup during its `MarkingPhase()` while holding
+    // the mutator exclusively, so we do not need a checkpoint. All other GCs perform
+    // the cleanup in their `ReclaimPhase()` while holding the mutator lock as shared
+    // and it's safe to release and re-acquire the mutator lock. Despite holding the
+    // mutator lock as shared, the thread is not always marked as `Runnable`.
+    // TODO: Clean up state transitions in different GC implementations. b/259440389
+    if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      // We do not need a checkpoint because no other thread is Runnable.
+    } else {
+      DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
+      // Use explicit state transitions or unlock/lock.
+      bool runnable = (self->GetState() == ThreadState::kRunnable);
+      if (runnable) {
+        self->TransitionFromRunnableToSuspended(ThreadState::kNative);
+      } else {
+        Locks::mutator_lock_->SharedUnlock(self);
+      }
+      DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
+      runtime->GetThreadList()->RunEmptyCheckpoint();
+      if (runnable) {
+        self->TransitionFromSuspendedToRunnable();
+      } else {
+        Locks::mutator_lock_->SharedLock(self);
+      }
+    }
+  }
+  delete range;
+}
+
+// This function is called within the signal handler. It checks that the thread
+// is `Runnable`, the `mutator_lock_` is held (shared) and the fault PC is in one
+// of the registered generated code ranges. No annotalysis is done.
+bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
   VLOG(signals) << "Checking for generated code";
@@ -343,76 +352,29 @@
     return false;
   }
 
-  ArtMethod* method_obj = nullptr;
-  uintptr_t return_pc = 0;
-  uintptr_t sp = 0;
-  bool is_stack_overflow = false;
-
-  // Get the architecture specific method address and return address.  These
-  // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
-  GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp, &is_stack_overflow);
-
-  // If we don't have a potential method, we're outta here.
-  VLOG(signals) << "potential method: " << method_obj;
-  // TODO: Check linear alloc and image.
-  DCHECK_ALIGNED(ArtMethod::Size(kRuntimePointerSize), sizeof(void*))
-      << "ArtMethod is not pointer aligned";
-  if (method_obj == nullptr || !IsAligned<sizeof(void*)>(method_obj)) {
-    VLOG(signals) << "no method";
+  uintptr_t fault_pc = GetFaultPc(siginfo, context);
+  if (fault_pc == 0u) {
+    VLOG(signals) << "no fault PC";
     return false;
   }
 
-  // Verify that the potential method is indeed a method.
-  // TODO: check the GC maps to make sure it's an object.
-  // Check that the class pointer inside the object is not null and is aligned.
-  // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = SafeGetDeclaringClass(method_obj);
-  if (cls == nullptr) {
-    VLOG(signals) << "not a class";
-    return false;
+  // Walk over the list of registered code ranges.
+  GeneratedCodeRange* range = generated_code_ranges_.load(std::memory_order_acquire);
+  while (range != nullptr) {
+    if (fault_pc - reinterpret_cast<uintptr_t>(range->start) < range->size) {
+      return true;
+    }
+    // We may or may not see ranges that were concurrently removed, depending
+    // on when the relaxed writes of the `next` links become visible. However,
+    // even if we're currently at a node that is being removed, we shall visit
+    // all remaining ranges that are not being removed as the removed nodes
+    // retain the `next` link at the time of removal (which may lead to other
+    // removed nodes before reaching remaining retained nodes, if any). Correct
+    // memory visibility of `start` and `size` fields of the visited ranges is
+    // ensured by the release and acquire operations on `generated_code_ranges_`.
+    range = range->next.load(std::memory_order_relaxed);
   }
-
-  if (!IsAligned<kObjectAlignment>(cls)) {
-    VLOG(signals) << "not aligned";
-    return false;
-  }
-
-  if (!SafeVerifyClassClass(cls)) {
-    VLOG(signals) << "not a class class";
-    return false;
-  }
-
-  if (!IsKnownPc(return_pc, method_obj)) {
-    VLOG(signals) << "PC not in Java code";
-    return false;
-  }
-
-  const OatQuickMethodHeader* method_header = method_obj->GetOatQuickMethodHeader(return_pc);
-
-  if (method_header == nullptr) {
-    VLOG(signals) << "no compiled code";
-    return false;
-  }
-
-  // We can be certain that this is a method now.  Check if we have a GC map
-  // at the return PC address.
-  if (true || kIsDebugBuild) {
-    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
-    uint32_t sought_offset = return_pc -
-        reinterpret_cast<uintptr_t>(method_header->GetEntryPoint());
-    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
-  }
-  uint32_t dexpc = dex::kDexNoIndex;
-  if (is_stack_overflow) {
-    // If it's an implicit stack overflow check, the frame is not setup, so we
-    // just infer the dex PC as zero.
-    dexpc = 0;
-  } else {
-    CHECK_EQ(*reinterpret_cast<ArtMethod**>(sp), method_obj);
-    dexpc = method_header->ToDexPc(reinterpret_cast<ArtMethod**>(sp), return_pc, false);
-  }
-  VLOG(signals) << "dexpc: " << dexpc;
-  return !check_dex_pc || dexpc != dex::kDexNoIndex;
+  return false;
 }
 
 FaultHandler::FaultHandler(FaultManager* manager) : manager_(manager) {
@@ -425,6 +387,76 @@
   manager_->AddHandler(this, true);
 }
 
+bool NullPointerHandler::IsValidMethod(ArtMethod* method) {
+  // At this point we know that the thread is `Runnable` and the PC is in one of
+  // the registered code ranges. The `method` was read from the top of the stack
+  // and should really point to an actual `ArtMethod`, unless we're crashing during
+  // prologue or epilogue, or somehow managed to jump to the compiled code by some
+  // unexpected path, other than method invoke or exception delivery. We do a few
+  // quick checks without guarding from another fault.
+  VLOG(signals) << "potential method: " << method;
+
+  static_assert(IsAligned<sizeof(void*)>(ArtMethod::Size(kRuntimePointerSize)));
+  if (method == nullptr || !IsAligned<sizeof(void*)>(method)) {
+    VLOG(signals) << ((method == nullptr) ? "null method" : "unaligned method");
+    return false;
+  }
+
+  // Check that the presumed method actually points to a class. Read barriers
+  // are not needed (and would be undesirable in a signal handler) when reading
+  // a chain of constant references to get to a non-movable `Class.class` object.
+
+  // Note: Allowing nested faults. Checking that the method is in one of the
+  // `LinearAlloc` spaces, or that objects we look at are in the `Heap` would be
+  // slow and require locking a mutex, which is undesirable in a signal handler.
+  // (Though we could register valid ranges similarly to the generated code ranges.)
+
+  mirror::Object* klass =
+      method->GetDeclaringClassAddressWithoutBarrier()->AsMirrorPtr();
+  if (klass == nullptr || !IsAligned<kObjectAlignment>(klass)) {
+    VLOG(signals) << ((klass == nullptr) ? "null class" : "unaligned class");
+    return false;
+  }
+
+  mirror::Class* class_class = klass->GetClass<kVerifyNone, kWithoutReadBarrier>();
+  if (class_class == nullptr || !IsAligned<kObjectAlignment>(class_class)) {
+    VLOG(signals) << ((klass == nullptr) ? "null class_class" : "unaligned class_class");
+    return false;
+  }
+
+  if (class_class != class_class->GetClass<kVerifyNone, kWithoutReadBarrier>()) {
+    VLOG(signals) << "invalid class_class";
+    return false;
+  }
+
+  return true;
+}
+
+bool NullPointerHandler::IsValidReturnPc(ArtMethod** sp, uintptr_t return_pc) {
+  // Check if we can associate a dex PC with the return PC, whether from Nterp,
+  // or with an existing stack map entry for a compiled method.
+  // Note: Allowing nested faults if `IsValidMethod()` returned a false positive.
+  // Note: The `ArtMethod::GetOatQuickMethodHeader()` can acquire locks (at least
+  // `Locks::jit_lock_`) and if the thread already held such a lock, the signal
+  // handler would deadlock. However, if a thread is holding one of the locks
+  // below the mutator lock, the PC should be somewhere in ART code and should
+  // not match any registered generated code range, so such as a deadlock is
+  // unlikely. If it happens anyway, the worst case is that an internal ART crash
+  // would be reported as ANR.
+  ArtMethod* method = *sp;
+  const OatQuickMethodHeader* method_header = method->GetOatQuickMethodHeader(return_pc);
+  if (method_header == nullptr) {
+    VLOG(signals) << "No method header.";
+    return false;
+  }
+  VLOG(signals) << "looking for dex pc for return pc 0x" << std::hex << return_pc
+                << " pc offset: 0x" << std::hex
+                << (return_pc - reinterpret_cast<uintptr_t>(method_header->GetEntryPoint()));
+  uint32_t dexpc = method_header->ToDexPc(reinterpret_cast<ArtMethod**>(sp), return_pc, false);
+  VLOG(signals) << "dexpc: " << dexpc;
+  return dexpc != dex::kDexNoIndex;
+}
+
 //
 // Suspension fault handler
 //
@@ -448,17 +480,13 @@
 
 bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
+  bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context);
   if (in_generated_code) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
-    ArtMethod* method = nullptr;
-    uintptr_t return_pc = 0;
-    uintptr_t sp = 0;
-    bool is_stack_overflow = false;
     Thread* self = Thread::Current();
 
-    manager_->GetMethodAndReturnPcAndSp(
-        siginfo, context, &method, &return_pc, &sp, &is_stack_overflow);
+    uintptr_t sp = FaultManager::GetFaultSp(context);
+    CHECK_NE(sp, 0u);  // Otherwise we should not have reached this handler.
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp));
     self->DumpJavaStack(LOG_STREAM(ERROR));
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 8b89c22..2186a13 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -21,9 +21,11 @@
 #include <signal.h>
 #include <stdint.h>
 
+#include <atomic>
 #include <vector>
 
 #include "base/locks.h"  // For annotalysis.
+#include "base/mutex.h"
 #include "runtime_globals.h"  // For CanDoImplicitNullCheckOn.
 
 namespace art {
@@ -51,25 +53,34 @@
   void AddHandler(FaultHandler* handler, bool generated_code);
   void RemoveHandler(FaultHandler* handler);
 
-  // Note that the following two functions are called in the context of a signal handler.
-  // The IsInGeneratedCode() function checks that the mutator lock is held before it
-  // calls GetMethodAndReturnPCAndSP().
-  // TODO: think about adding lock assertions and fake lock and unlock functions.
-  void GetMethodAndReturnPcAndSp(siginfo_t* siginfo,
-                                 void* context,
-                                 ArtMethod** out_method,
-                                 uintptr_t* out_return_pc,
-                                 uintptr_t* out_sp,
-                                 bool* out_is_stack_overflow)
-                                 NO_THREAD_SAFETY_ANALYSIS;
-  bool IsInGeneratedCode(siginfo_t* siginfo, void *context, bool check_dex_pc)
-                         NO_THREAD_SAFETY_ANALYSIS;
+  void AddGeneratedCodeRange(const void* start, size_t size);
+  void RemoveGeneratedCodeRange(const void* start, size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Retrieves fault PC from architecture-dependent `context`, returns 0 on failure.
+  // Called in the context of a signal handler.
+  static uintptr_t GetFaultPc(siginfo_t* siginfo, void* context);
+
+  // Retrieves SP from architecture-dependent `context`.
+  // Called in the context of a signal handler.
+  static uintptr_t GetFaultSp(void* context);
+
+  // Checks if the fault happened while running generated code.
+  // Called in the context of a signal handler.
+  bool IsInGeneratedCode(siginfo_t* siginfo, void *context) NO_THREAD_SAFETY_ANALYSIS;
 
  private:
+  struct GeneratedCodeRange;
+
   // The HandleFaultByOtherHandlers function is only called by HandleFault function for generated code.
   bool HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* context)
                                   NO_THREAD_SAFETY_ANALYSIS;
 
+  // Note: The lock guards modifications of the ranges but the function `IsInGeneratedCode()`
+  // walks the list in the context of a signal handler without holding the lock.
+  Mutex generated_code_ranges_lock_;
+  std::atomic<GeneratedCodeRange*> generated_code_ranges_ GUARDED_BY(generated_code_ranges_lock_);
+
   std::vector<FaultHandler*> generated_code_handlers_;
   std::vector<FaultHandler*> other_handlers_;
   struct sigaction oldaction_;
@@ -98,17 +109,29 @@
  public:
   explicit NullPointerHandler(FaultManager* manager);
 
-  bool Action(int sig, siginfo_t* siginfo, void* context) override;
-
-  static bool IsValidImplicitCheck(siginfo_t* siginfo) {
-    // Our implicit NPE checks always limit the range to a page.
-    // Note that the runtime will do more exhaustive checks (that we cannot
-    // reasonably do in signal processing code) based on the dex instruction
-    // faulting.
-    return CanDoImplicitNullCheckOn(reinterpret_cast<uintptr_t>(siginfo->si_addr));
-  }
+  // NO_THREAD_SAFETY_ANALYSIS: Called after the fault manager determined that
+  // the thread is `Runnable` and holds the mutator lock (shared) but without
+  // telling annotalysis that we actually hold the lock.
+  bool Action(int sig, siginfo_t* siginfo, void* context) override
+      NO_THREAD_SAFETY_ANALYSIS;
 
  private:
+  // Helper functions for checking whether the signal can be interpreted
+  // as implicit NPE check. Note that the runtime will do more exhaustive
+  // checks (that we cannot reasonably do in signal processing code) based
+  // on the dex instruction faulting.
+
+  static bool IsValidFaultAddress(uintptr_t fault_address) {
+    // Our implicit NPE checks always limit the range to a page.
+    return CanDoImplicitNullCheckOn(fault_address);
+  }
+
+  static bool IsValidMethod(ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static bool IsValidReturnPc(ArtMethod** sp, uintptr_t return_pc)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(NullPointerHandler);
 };
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 4910b51..1ab1c77 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -208,9 +208,10 @@
     }
   }
 
-  size_t initial_capacity = Runtime::Current()->GetJITOptions()->GetCodeCacheInitialCapacity();
+  Runtime* runtime = Runtime::Current();
+  size_t initial_capacity = runtime->GetJITOptions()->GetCodeCacheInitialCapacity();
   // Check whether the provided max capacity in options is below 1GB.
-  size_t max_capacity = Runtime::Current()->GetJITOptions()->GetCodeCacheMaxCapacity();
+  size_t max_capacity = runtime->GetJITOptions()->GetCodeCacheMaxCapacity();
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -232,6 +233,11 @@
     return nullptr;
   }
 
+  if (region.HasCodeMapping()) {
+    const MemMap* exec_pages = region.GetExecPages();
+    runtime->AddGeneratedCodeRange(exec_pages->Begin(), exec_pages->Size());
+  }
+
   std::unique_ptr<JitCodeCache> jit_code_cache(new JitCodeCache());
   if (is_zygote) {
     // Zygote should never collect code to share the memory with the children.
@@ -266,7 +272,16 @@
       histogram_profiling_info_memory_use_("Memory used for profiling info", 16) {
 }
 
-JitCodeCache::~JitCodeCache() {}
+JitCodeCache::~JitCodeCache() {
+  if (private_region_.HasCodeMapping()) {
+    const MemMap* exec_pages = private_region_.GetExecPages();
+    Runtime::Current()->RemoveGeneratedCodeRange(exec_pages->Begin(), exec_pages->Size());
+  }
+  if (shared_region_.HasCodeMapping()) {
+    const MemMap* exec_pages = shared_region_.GetExecPages();
+    Runtime::Current()->RemoveGeneratedCodeRange(exec_pages->Begin(), exec_pages->Size());
+  }
+}
 
 bool JitCodeCache::PrivateRegionContainsPc(const void* ptr) const {
   return private_region_.IsInExecSpace(ptr);
@@ -1888,7 +1903,8 @@
   // We do this now and not in Jit::PostForkChildAction, as system server calls
   // JitCodeCache::PostForkChildAction first, and then does some code loading
   // that may result in new JIT tasks that we want to keep.
-  ThreadPool* pool = Runtime::Current()->GetJit()->GetThreadPool();
+  Runtime* runtime = Runtime::Current();
+  ThreadPool* pool = runtime->GetJit()->GetThreadPool();
   if (pool != nullptr) {
     pool->RemoveAllTasks(self);
   }
@@ -1899,7 +1915,7 @@
   // to write to them.
   shared_region_.ResetWritableMappings();
 
-  if (is_zygote || Runtime::Current()->IsSafeMode()) {
+  if (is_zygote || runtime->IsSafeMode()) {
     // Don't create a private region for a child zygote. Regions are usually map shared
     // (to satisfy dual-view), and we don't want children of a child zygote to inherit it.
     return;
@@ -1914,8 +1930,8 @@
   histogram_code_memory_use_.Reset();
   histogram_profiling_info_memory_use_.Reset();
 
-  size_t initial_capacity = Runtime::Current()->GetJITOptions()->GetCodeCacheInitialCapacity();
-  size_t max_capacity = Runtime::Current()->GetJITOptions()->GetCodeCacheMaxCapacity();
+  size_t initial_capacity = runtime->GetJITOptions()->GetCodeCacheInitialCapacity();
+  size_t max_capacity = runtime->GetJITOptions()->GetCodeCacheMaxCapacity();
   std::string error_msg;
   if (!private_region_.Initialize(initial_capacity,
                                   max_capacity,
@@ -1924,6 +1940,10 @@
                                   &error_msg)) {
     LOG(WARNING) << "Could not create private region after zygote fork: " << error_msg;
   }
+  if (private_region_.HasCodeMapping()) {
+    const MemMap* exec_pages = private_region_.GetExecPages();
+    runtime->AddGeneratedCodeRange(exec_pages->Begin(), exec_pages->Size());
+  }
 }
 
 JitMemoryRegion* JitCodeCache::GetCurrentRegion() {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index f60d016..1fc434e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -844,6 +844,18 @@
   return runtime != nullptr && runtime->IsStarted() && !runtime->IsShuttingDownLocked();
 }
 
+void Runtime::AddGeneratedCodeRange(const void* start, size_t size) {
+  if (HandlesSignalsInCompiledCode()) {
+    fault_manager.AddGeneratedCodeRange(start, size);
+  }
+}
+
+void Runtime::RemoveGeneratedCodeRange(const void* start, size_t size) {
+  if (HandlesSignalsInCompiledCode()) {
+    fault_manager.RemoveGeneratedCodeRange(start, size);
+  }
+}
+
 bool Runtime::Create(RuntimeArgumentMap&& runtime_options) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != nullptr) {
@@ -988,7 +1000,6 @@
     if (!jit::Jit::LoadCompilerLibrary(&error_msg)) {
       LOG(WARNING) << "Failed to load JIT compiler with error " << error_msg;
     }
-    CreateJitCodeCache(/*rwx_memory_allowed=*/true);
     CreateJit();
 #ifdef ADDRESS_SANITIZER
     // (b/238730394): In older implementations of sanitizer + glibc there is a race between
@@ -1562,6 +1573,8 @@
         << (core_platform_api_policy_ == hiddenapi::EnforcementPolicy::kEnabled ? "true" : "false");
   }
 
+  // Dex2Oat's Runtime does not need the signal chain or the fault handler
+  // and it passes the `NoSigChain` option to `Runtime` to indicate this.
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
   force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
@@ -1753,31 +1766,34 @@
       break;
   }
 
-  if (!no_sig_chain_) {
-    // Dex2Oat's Runtime does not need the signal chain or the fault handler.
-    if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
-      fault_manager.Init();
+  if (HandlesSignalsInCompiledCode()) {
+    fault_manager.Init();
 
-      // These need to be in a specific order.  The null point check handler must be
-      // after the suspend check and stack overflow check handlers.
-      //
-      // Note: the instances attach themselves to the fault manager and are handled by it. The
-      //       manager will delete the instance on Shutdown().
-      if (implicit_suspend_checks_) {
-        new SuspensionHandler(&fault_manager);
-      }
+    // These need to be in a specific order.  The null point check handler must be
+    // after the suspend check and stack overflow check handlers.
+    //
+    // Note: the instances attach themselves to the fault manager and are handled by it. The
+    //       manager will delete the instance on Shutdown().
+    if (implicit_suspend_checks_) {
+      new SuspensionHandler(&fault_manager);
+    }
 
-      if (implicit_so_checks_) {
-        new StackOverflowHandler(&fault_manager);
-      }
+    if (implicit_so_checks_) {
+      new StackOverflowHandler(&fault_manager);
+    }
 
-      if (implicit_null_checks_) {
-        new NullPointerHandler(&fault_manager);
-      }
+    if (implicit_null_checks_) {
+      new NullPointerHandler(&fault_manager);
+    }
 
-      if (kEnableJavaStackTraceHandler) {
-        new JavaStackTraceHandler(&fault_manager);
-      }
+    if (kEnableJavaStackTraceHandler) {
+      new JavaStackTraceHandler(&fault_manager);
+    }
+
+    if (interpreter::CanRuntimeUseNterp()) {
+      // Nterp code can use signal handling just like the compiled managed code.
+      OatQuickMethodHeader* nterp_header = OatQuickMethodHeader::NterpMethodHeader;
+      fault_manager.AddGeneratedCodeRange(nterp_header->GetCode(), nterp_header->GetCodeSize());
     }
   }
 
@@ -3003,7 +3019,9 @@
   }
 }
 
-void Runtime::CreateJitCodeCache(bool rwx_memory_allowed) {
+void Runtime::CreateJit() {
+  DCHECK(jit_code_cache_ == nullptr);
+  DCHECK(jit_ == nullptr);
   if (kIsDebugBuild && GetInstrumentation()->IsForcedInterpretOnly()) {
     DCHECK(!jit_options_->UseJitCompilation());
   }
@@ -3012,28 +3030,19 @@
     return;
   }
 
+  if (IsSafeMode()) {
+    LOG(INFO) << "Not creating JIT because of SafeMode.";
+    return;
+  }
+
   std::string error_msg;
   bool profiling_only = !jit_options_->UseJitCompilation();
   jit_code_cache_.reset(jit::JitCodeCache::Create(profiling_only,
-                                                  rwx_memory_allowed,
+                                                  /*rwx_memory_allowed=*/ true,
                                                   IsZygote(),
                                                   &error_msg));
   if (jit_code_cache_.get() == nullptr) {
     LOG(WARNING) << "Failed to create JIT Code Cache: " << error_msg;
-  }
-}
-
-void Runtime::CreateJit() {
-  DCHECK(jit_ == nullptr);
-  if (jit_code_cache_.get() == nullptr) {
-    if (!IsSafeMode()) {
-      LOG(WARNING) << "Missing code cache, cannot create JIT.";
-    }
-    return;
-  }
-  if (IsSafeMode()) {
-    LOG(INFO) << "Not creating JIT because of SafeMode.";
-    jit_code_cache_.reset();
     return;
   }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index c3f1a70..a0a36b9 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -1155,6 +1155,10 @@
     return no_sig_chain_;
   }
 
+  void AddGeneratedCodeRange(const void* start, size_t size);
+  void RemoveGeneratedCodeRange(const void* start, size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Trigger a flag reload from system properties or device congfigs.
   //
   // Should only be called from runtime init and zygote post fork as
@@ -1188,6 +1192,11 @@
 
   Runtime();
 
+  bool HandlesSignalsInCompiledCode() const {
+    return !no_sig_chain_ &&
+           (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_);
+  }
+
   void BlockSignals();
 
   bool Init(RuntimeArgumentMap&& runtime_options)