14 files changed, 251 insertions, 98 deletions
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index d4bed097ec..5c48575672 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -364,8 +364,8 @@ uint32_t ArtMethod::FindCatchBlock(Handle<mirror::Class> exception_type,
 NO_STACK_PROTECTOR
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
                        const char* shorty) {
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd<kNativeStackType>())) {
+    ThrowStackOverflowError<kNativeStackType>(self);
     return;
   }
 
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index ec43f69b19..e1cabbb574 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -696,13 +696,23 @@ void ThrowSecurityException(const char* fmt, ...) {
 
 // Stack overflow.
 
+template <StackType stack_type>
 void ThrowStackOverflowError(Thread* self) {
-  if (self->IsHandlingStackOverflow()) {
+  if (self->IsHandlingStackOverflow<stack_type>()) {
     LOG(ERROR) << "Recursive stack overflow.";
     // We don't fail here because SetStackEndForStackOverflow will print better diagnostics.
   }
 
-  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
+  // Allow space on the stack for constructor to execute.
+  self->SetStackEndForStackOverflow<stack_type>();
+
+  // Remove the stack overflow protection if it is set up.
+  bool implicit_stack_check = Runtime::Current()->GetImplicitStackOverflowChecks();
+  if (implicit_stack_check) {
+    if (!self->UnprotectStack<stack_type>()) {
+      LOG(ERROR) << "Unable to remove stack protection for stack overflow";
+    }
+  }
 
   // Avoid running Java code for exception initialization.
   // TODO: Checks to make this a bit less brittle.
@@ -713,7 +723,7 @@ void ThrowStackOverflowError(Thread* self) {
   //       with larger stack sizes (e.g., ASAN).
   auto create_and_throw = [self]() REQUIRES_SHARED(Locks::mutator_lock_) NO_INLINE {
     std::string msg("stack size ");
-    msg += PrettySize(self->GetStackSize());
+    msg += PrettySize(self->GetUsableStackSize<stack_type>());
 
     ScopedObjectAccessUnchecked soa(self);
     StackHandleScope<1u> hs(self);
@@ -791,14 +801,17 @@ void ThrowStackOverflowError(Thread* self) {
   create_and_throw();
   CHECK(self->IsExceptionPending());
 
-  self->ResetDefaultStackEnd();  // Return to default stack size.
+  self->ResetDefaultStackEnd<stack_type>();  // Return to default stack size.
 
   // And restore protection if implicit checks are on.
-  if (Runtime::Current()->GetImplicitStackOverflowChecks()) {
-    self->ProtectStack();
+  if (implicit_stack_check) {
+    self->ProtectStack<stack_type>();
   }
 }
 
+// Explicit instantiations to keep this definition separate to the declaration.
+template void ThrowStackOverflowError<StackType::kHardware>(Thread* self);
+
 // StringIndexOutOfBoundsException
 
 void ThrowStringIndexOutOfBoundsException(int index, int length) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 9b5da327f2..9def38d1ca 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -22,6 +22,7 @@
 #include "base/locks.h"
 #include "base/macros.h"
 #include "obj_ptr.h"
+#include "thread.h"
 
 namespace art HIDDEN {
 namespace mirror {
@@ -253,6 +254,7 @@ void ThrowSecurityException(const char* fmt, ...)
 
 // Stack overflow.
 
+template <StackType stack_type>
 void ThrowStackOverflowError(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 // StringIndexOutOfBoundsException
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 3ca95eabe4..a3ac287236 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -123,7 +123,10 @@ extern "C" Context* artThrowStringBoundsFromCode(int index, int length, Thread*
 extern "C" Context* artThrowStackOverflowFromCode(Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  ThrowStackOverflowError(self);
+  // Throw a stack overflow error for the quick stack. This is needed to throw stack overflow
+  // errors on the simulated stack, which is used for quick code when building for the simulator.
+  // See kQuickStackType for more details.
+  ThrowStackOverflowError<kQuickStackType>(self);
   std::unique_ptr<Context> context = self->QuickDeliverException();
   DCHECK(context != nullptr);
   return context.release();
diff --git a/runtime/gc/collector/mark_compact-inl.h b/runtime/gc/collector/mark_compact-inl.h
index 05dc65a961..d840223720 100644
--- a/runtime/gc/collector/mark_compact-inl.h
+++ b/runtime/gc/collector/mark_compact-inl.h
@@ -20,6 +20,7 @@
 #include "gc/space/bump_pointer_space.h"
 #include "mark_compact.h"
 #include "mirror/object-inl.h"
+#include "thread-inl.h"
 
 namespace art HIDDEN {
 namespace gc {
@@ -256,8 +257,11 @@ inline bool MarkCompact::VerifyRootSingleUpdate(void* root,
     }
     Thread* self = Thread::Current();
     if (UNLIKELY(stack_low_addr == nullptr)) {
-      stack_low_addr = self->GetStackEnd();
-      stack_high_addr = reinterpret_cast<char*>(stack_low_addr) + self->GetStackSize();
+      // TODO(Simulator): Test that this should not operate on the simulated stack when the
+      // simulator supports mark compact.
+      stack_low_addr = self->GetStackEnd<kNativeStackType>();
+      stack_high_addr = reinterpret_cast<char*>(stack_low_addr)
+                        + self->GetUsableStackSize<kNativeStackType>();
     }
     if (std::less<void*>{}(root, stack_low_addr) || std::greater<void*>{}(root, stack_high_addr)) {
       bool inserted;
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 618625ec61..e6ba007e5e 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -2658,9 +2658,11 @@ void MarkCompact::CompactionPause() {
   non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
   if (kIsDebugBuild) {
     DCHECK_EQ(thread_running_gc_, Thread::Current());
-    stack_low_addr_ = thread_running_gc_->GetStackEnd();
-    stack_high_addr_ =
-        reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize();
+    // TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
+    // supports mark compact.
+    stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
+    stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
+                       + thread_running_gc_->GetUsableStackSize<kNativeStackType>();
   }
   {
     TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8f3189f6e5..9bed9833b7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1500,8 +1500,10 @@ std::string Heap::DumpSpaceNameFromAddress(const void* addr) const {
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
   // If we're in a stack overflow, do not create a new exception. It would require running the
-  // constructor, which will of course still be in a stack overflow.
-  if (self->IsHandlingStackOverflow()) {
+  // constructor, which will of course still be in a stack overflow. Note: we only care if the
+  // native stack has overflowed. If the simulated stack overflows, it is still possible that the
+  // native stack has room to create a new exception.
+  if (self->IsHandlingStackOverflow<kNativeStackType>()) {
     self->SetException(
         Runtime::Current()->GetPreAllocatedOutOfMemoryErrorWhenHandlingStackOverflow());
     return;
@@ -2794,9 +2796,12 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type,
     // This would likely cause a deadlock if we acted on a suspension request.
     // TODO: We really want to assert that we don't transition to kRunnable.
     ScopedAssertNoThreadSuspension scoped_assert("Performing GC");
-    if (self->IsHandlingStackOverflow()) {
+    if (self->IsHandlingStackOverflow<kNativeStackType>()) {
       // If we are throwing a stack overflow error we probably don't have enough remaining stack
-      // space to run the GC.
+      // space to run the GC. Note: we only care if the native stack has overflowed. If the
+      // simulated stack overflows it is still possible that the native stack has room to run the
+      // GC.
+
       // Count this as a GC in case someone is waiting for it to complete.
       gcs_completed_.fetch_add(1, std::memory_order_release);
       return collector::kGcTypeNone;
@@ -3975,8 +3980,10 @@ class Heap::ConcurrentGCTask : public HeapTask {
 
 static bool CanAddHeapTask(Thread* self) REQUIRES(!Locks::runtime_shutdown_lock_) {
   Runtime* runtime = Runtime::Current();
+  // We only care if the native stack has overflowed. If the simulated stack overflows, it is still
+  // possible that the native stack has room to add a heap task.
   return runtime != nullptr && runtime->IsFinishedStarting() && !runtime->IsShuttingDown(self) &&
-      !self->IsHandlingStackOverflow();
+      !self->IsHandlingStackOverflow<kNativeStackType>();
 }
 
 bool Heap::RequestConcurrentGC(Thread* self,
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index eeeb300a92..59388e7c96 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -352,7 +352,7 @@ void EnterInterpreterFromInvoke(Thread* self,
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = Runtime::Current()->GetImplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
-    ThrowStackOverflowError(self);
+    ThrowStackOverflowError<kNativeStackType>(self);
     return;
   }
 
@@ -570,7 +570,7 @@ JValue EnterInterpreterFromEntryPoint(Thread* self, const CodeItemDataAccessor&
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = Runtime::Current()->GetImplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
-    ThrowStackOverflowError(self);
+    ThrowStackOverflowError<kNativeStackType>(self);
     return JValue();
   }
 
@@ -588,7 +588,7 @@ void ArtInterpreterToInterpreterBridge(Thread* self,
                                        JValue* result) {
   bool implicit_check = Runtime::Current()->GetImplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
-    ThrowStackOverflowError(self);
+    ThrowStackOverflowError<kNativeStackType>(self);
     return;
   }
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index f530fa245d..8921a4ab7f 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -62,7 +62,7 @@ bool CheckStackOverflow(Thread* self, size_t frame_size)
   bool implicit_check = Runtime::Current()->GetImplicitStackOverflowChecks();
   uint8_t* stack_end = self->GetStackEndForInterpreter(implicit_check);
   if (UNLIKELY(__builtin_frame_address(0) < stack_end + frame_size)) {
-    ThrowStackOverflowError(self);
+    ThrowStackOverflowError<kNativeStackType>(self);
     return false;
   }
   return true;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 567f07adf0..46a1a4aa73 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -452,7 +452,7 @@ bool Jit::MaybeDoOnStackReplacement(Thread* thread,
     return false;
   }
 
-  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
+  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd<kNativeStackType>())) {
     // Don't attempt to do an OSR if we are close to the stack limit. Since
     // the interpreter frames are still on stack, OSR has the potential
     // to stack overflow even for a simple loop.
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 5c394b136a..8ebbeddcb1 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -532,8 +532,8 @@ JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
   // We want to make sure that the stack is not within a small distance from the
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd<kNativeStackType>())) {
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return JValue();
   }
   bool is_string_init = method->IsStringConstructor();
@@ -574,8 +574,8 @@ JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
   // We want to make sure that the stack is not within a small distance from the
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd<kNativeStackType>())) {
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return JValue();
   }
   bool is_string_init = method->IsStringConstructor();
@@ -615,8 +615,8 @@ JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnab
   // We want to make sure that the stack is not within a small distance from the
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd<kNativeStackType>())) {
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return JValue();
   }
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
@@ -658,8 +658,8 @@ JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnab
   // We want to make sure that the stack is not within a small distance from the
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd<kNativeStackType>())) {
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return JValue();
   }
 
@@ -702,7 +702,7 @@ jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaM
   // check has been elided.
   if (UNLIKELY(__builtin_frame_address(0) <
                soa.Self()->GetStackEndForInterpreter(true))) {
-    ThrowStackOverflowError(soa.Self());
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return nullptr;
   }
 
@@ -797,7 +797,7 @@ void InvokeConstructor(const ScopedObjectAccessAlreadyRunnable& soa,
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
   if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEndForInterpreter(true))) {
-    ThrowStackOverflowError(soa.Self());
+    ThrowStackOverflowError<kNativeStackType>(soa.Self());
     return;
   }
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 66771230fe..180d28b5eb 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -572,8 +572,33 @@ inline ShadowFrame* Thread::PopShadowFrame() {
   return tlsPtr_.managed_stack.PopShadowFrame();
 }
 
+template <>
+inline uint8_t* Thread::GetStackEnd<StackType::kHardware>() const {
+  return tlsPtr_.stack_end;
+}
+template <>
+inline void Thread::SetStackEnd<StackType::kHardware>(uint8_t* new_stack_end) {
+  tlsPtr_.stack_end = new_stack_end;
+}
+template <>
+inline uint8_t* Thread::GetStackBegin<StackType::kHardware>() const {
+  return tlsPtr_.stack_begin;
+}
+template <>
+inline void Thread::SetStackBegin<StackType::kHardware>(uint8_t* new_stack_begin) {
+  tlsPtr_.stack_begin = new_stack_begin;
+}
+template <>
+inline size_t Thread::GetStackSize<StackType::kHardware>() const {
+  return tlsPtr_.stack_size;
+}
+template <>
+inline void Thread::SetStackSize<StackType::kHardware>(size_t new_stack_size) {
+  tlsPtr_.stack_size = new_stack_size;
+}
+
 inline uint8_t* Thread::GetStackEndForInterpreter(bool implicit_overflow_check) const {
-  uint8_t* end = tlsPtr_.stack_end + (implicit_overflow_check
+  uint8_t* end = GetStackEnd<kNativeStackType>() + (implicit_overflow_check
       ? GetStackOverflowReservedBytes(kRuntimeISA)
           : 0);
   if (kIsDebugBuild) {
@@ -586,10 +611,27 @@ inline uint8_t* Thread::GetStackEndForInterpreter(bool implicit_overflow_check)
   return end;
 }
 
+template <StackType stack_type>
 inline void Thread::ResetDefaultStackEnd() {
   // Our stacks grow down, so we want stack_end_ to be near there, but reserving enough room
   // to throw a StackOverflowError.
-  tlsPtr_.stack_end = tlsPtr_.stack_begin + GetStackOverflowReservedBytes(kRuntimeISA);
+  SetStackEnd<stack_type>(
+              GetStackBegin<stack_type>() + GetStackOverflowReservedBytes(kRuntimeISA));
+}
+
+template <StackType stack_type>
+inline void Thread::SetStackEndForStackOverflow()
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // During stack overflow we allow use of the full stack.
+  if (GetStackEnd<stack_type>() == GetStackBegin<stack_type>()) {
+    // However, we seem to have already extended to use the full stack.
+    LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
+               << GetStackOverflowReservedBytes(kRuntimeISA) << ")?";
+    DumpStack(LOG_STREAM(ERROR));
+    LOG(FATAL) << "Recursive stack overflow.";
+  }
+
+  SetStackEnd<stack_type>(GetStackBegin<stack_type>());
 }
 
 inline void Thread::NotifyOnThreadExit(ThreadExitFlag* tef) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index dcbe1382e0..dca08959f4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -622,7 +622,7 @@ void* Thread::CreateCallback(void* arg) {
     // while threads are being born).
     CHECK(!runtime->IsShuttingDownLocked());
     // Note: given that the JNIEnv is created in the parent thread, the only failure point here is
-    //       a mess in InitStackHwm. We do not have a reasonable way to recover from that, so abort
+    //       a mess in InitStack. We do not have a reasonable way to recover from that, so abort
     //       the runtime in such a case. In case this ever changes, we need to make sure here to
     //       delete the tmp_jni_env, as we own it at this point.
     CHECK(self->Init(runtime->GetThreadList(), runtime->GetJavaVM(), self->tlsPtr_.tmp_jni_env));
@@ -729,9 +729,8 @@ static size_t FixStackSize(size_t stack_size) {
   return stack_size;
 }
 
-// Return the nearest page-aligned address below the current stack top.
-NO_INLINE
-static uint8_t* FindStackTop() {
+template <>
+NO_INLINE uint8_t* Thread::FindStackTop<StackType::kHardware>() {
   return reinterpret_cast<uint8_t*>(
       AlignDown(__builtin_frame_address(0), gPageSize));
 }
@@ -739,16 +738,17 @@ static uint8_t* FindStackTop() {
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_begin_.
 ATTRIBUTE_NO_SANITIZE_ADDRESS
+template <StackType stack_type>
 void Thread::InstallImplicitProtection() {
-  uint8_t* pregion = tlsPtr_.stack_begin - GetStackOverflowProtectedSize();
+  uint8_t* pregion = GetStackBegin<stack_type>() - GetStackOverflowProtectedSize();
   // Page containing current top of stack.
-  uint8_t* stack_top = FindStackTop();
+  uint8_t* stack_top = FindStackTop<stack_type>();
 
   // Try to directly protect the stack.
   VLOG(threads) << "installing stack protected region at " << std::hex <<
         static_cast<void*>(pregion) << " to " <<
         static_cast<void*>(pregion + GetStackOverflowProtectedSize() - 1);
-  if (ProtectStack(/* fatal_on_error= */ false)) {
+  if (ProtectStack<stack_type>(/* fatal_on_error= */ false)) {
     // Tell the kernel that we won't be needing these pages any more.
     // NB. madvise will probably write zeroes into the memory (on linux it does).
     size_t unwanted_size =
@@ -778,7 +778,7 @@ void Thread::InstallImplicitProtection() {
 
   // (Defensively) first remove the protection on the protected region as we'll want to read
   // and write it. Ignore errors.
-  UnprotectStack();
+  UnprotectStack<stack_type>();
 
   VLOG(threads) << "Need to map in stack for thread at " << std::hex <<
       static_cast<void*>(pregion);
@@ -821,7 +821,7 @@ void Thread::InstallImplicitProtection() {
       static_cast<void*>(pregion + GetStackOverflowProtectedSize() - 1);
 
   // Protect the bottom of the stack to prevent read/write to it.
-  ProtectStack(/* fatal_on_error= */ true);
+  ProtectStack<stack_type>(/* fatal_on_error= */ true);
 
   // Tell the kernel that we won't be needing these pages any more.
   // NB. madvise will probably write zeroes into the memory (on linux it does).
@@ -948,6 +948,11 @@ void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_siz
   }
 }
 
+static void GetThreadStack(pthread_t thread,
+                           void** stack_base,
+                           size_t* stack_size,
+                           size_t* guard_size);
+
 bool Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm, JNIEnvExt* jni_env_ext) {
   // This function does all the initialization that must be run by the native thread it applies to.
   // (When we create a new thread from managed code, we allocate the Thread* in Thread::Create so
@@ -963,7 +968,14 @@ bool Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm, JNIEnvExt* jni_en
   ScopedTrace trace("Thread::Init");
 
   SetUpAlternateSignalStack();
-  if (!InitStackHwm()) {
+
+  void* read_stack_base = nullptr;
+  size_t read_stack_size = 0;
+  size_t read_guard_size = 0;
+  GetThreadStack(tlsPtr_.pthread_self, &read_stack_base, &read_stack_size, &read_guard_size);
+  if (!InitStack<kNativeStackType>(reinterpret_cast<uint8_t*>(read_stack_base),
+                                   read_stack_size,
+                                   read_guard_size)) {
     return false;
   }
   InitCpu();
@@ -1331,15 +1343,12 @@ static void GetThreadStack(pthread_t thread,
 #endif
 }
 
-bool Thread::InitStackHwm() {
-  ScopedTrace trace("InitStackHwm");
-  void* read_stack_base;
-  size_t read_stack_size;
-  size_t read_guard_size;
-  GetThreadStack(tlsPtr_.pthread_self, &read_stack_base, &read_stack_size, &read_guard_size);
+template <StackType stack_type>
+bool Thread::InitStack(uint8_t* read_stack_base, size_t read_stack_size, size_t read_guard_size) {
+  ScopedTrace trace("InitStack");
 
-  tlsPtr_.stack_begin = reinterpret_cast<uint8_t*>(read_stack_base);
-  tlsPtr_.stack_size = read_stack_size;
+  SetStackBegin<stack_type>(read_stack_base);
+  SetStackSize<stack_type>(read_stack_size);
 
   // The minimum stack size we can cope with is the protected region size + stack overflow check
   // region size + some memory for normal stack usage.
@@ -1372,8 +1381,16 @@ bool Thread::InitStackHwm() {
     return false;
   }
 
+  std::string stack_type_str = "";
+  if constexpr (stack_type == kNativeStackType) {
+    stack_type_str = "Native";
+  } else if constexpr (stack_type == kQuickStackType) {
+    stack_type_str = "Quick";
+  }
+
   // This is included in the SIGQUIT output, but it's useful here for thread debugging.
-  VLOG(threads) << StringPrintf("Native stack is at %p (%s with %s guard)",
+  VLOG(threads) << StringPrintf("%s stack is at %p (%s with %s guard)",
+                                stack_type_str.c_str(),
                                 read_stack_base,
                                 PrettySize(read_stack_size).c_str(),
                                 PrettySize(read_guard_size).c_str());
@@ -1384,7 +1401,7 @@ bool Thread::InitStackHwm() {
   bool implicit_stack_check =
       runtime->GetImplicitStackOverflowChecks() && !runtime->IsAotCompiler();
 
-  ResetDefaultStackEnd();
+  ResetDefaultStackEnd<stack_type>();
 
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
@@ -1392,15 +1409,18 @@ bool Thread::InitStackHwm() {
     // to install our own region so we need to move the limits
     // of the stack to make room for it.
 
-    tlsPtr_.stack_begin += read_guard_size + GetStackOverflowProtectedSize();
-    tlsPtr_.stack_end += read_guard_size + GetStackOverflowProtectedSize();
-    tlsPtr_.stack_size -= read_guard_size + GetStackOverflowProtectedSize();
+    SetStackBegin<stack_type>(GetStackBegin<stack_type>() + read_guard_size
+                              + GetStackOverflowProtectedSize());
+    SetStackEnd<stack_type>(GetStackEnd<stack_type>() + read_guard_size
+                            + GetStackOverflowProtectedSize());
+    SetStackSize<stack_type>(GetStackSize<stack_type>() - (read_guard_size
+                             + GetStackOverflowProtectedSize()));
 
-    InstallImplicitProtection();
+    InstallImplicitProtection<stack_type>();
   }
 
   // Consistency check.
-  CHECK_GT(FindStackTop(), reinterpret_cast<void*>(tlsPtr_.stack_end));
+  CHECK_GT(FindStackTop<stack_type>(), reinterpret_cast<void*>(GetStackEnd<stack_type>()));
 
   return true;
 }
@@ -2115,9 +2135,10 @@ void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
      << " core=" << task_cpu
      << " HZ=" << sysconf(_SC_CLK_TCK) << "\n";
   if (thread != nullptr) {
-    os << "  | stack=" << reinterpret_cast<void*>(thread->tlsPtr_.stack_begin) << "-"
-        << reinterpret_cast<void*>(thread->tlsPtr_.stack_end) << " stackSize="
-        << PrettySize(thread->tlsPtr_.stack_size) << "\n";
+    // TODO(Simulator): Also dump the simulated stack if one exists.
+    os << "  | stack=" << reinterpret_cast<void*>(thread->GetStackBegin<kNativeStackType>())
+        << "-" << reinterpret_cast<void*>(thread->GetStackEnd<kNativeStackType>())
+        << " stackSize=" << PrettySize(thread->GetStackSize<kNativeStackType>()) << "\n";
     // Dump the held mutexes.
     os << "  | held mutexes=";
     for (size_t i = 0; i < kLockLevelCount; ++i) {
@@ -2804,12 +2825,17 @@ class JniTransitionReferenceVisitor : public StackVisitor {
   bool found_;
 };
 
+bool Thread::IsRawObjOnQuickStack(uint8_t* raw_obj) const {
+  return (static_cast<size_t>(raw_obj - GetStackBegin<kQuickStackType>()) <
+          GetStackSize<kQuickStackType>());
+}
+
 bool Thread::IsJniTransitionReference(jobject obj) const {
   DCHECK(obj != nullptr);
   // We need a non-const pointer for stack walk even if we're not modifying the thread state.
   Thread* thread = const_cast<Thread*>(this);
   uint8_t* raw_obj = reinterpret_cast<uint8_t*>(obj);
-  if (static_cast<size_t>(raw_obj - tlsPtr_.stack_begin) < tlsPtr_.stack_size) {
+  if (IsRawObjOnQuickStack(raw_obj)) {
     JniTransitionReferenceVisitor</*kPointsToStack=*/ true> visitor(thread, raw_obj);
     visitor.WalkStack();
     return visitor.Found();
@@ -4622,28 +4648,6 @@ void Thread::VerifyStackImpl() {
   }
 }
 
-// Set the stack end to that to be used during a stack overflow
-void Thread::SetStackEndForStackOverflow() {
-  // During stack overflow we allow use of the full stack.
-  if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
-    // However, we seem to have already extended to use the full stack.
-    LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << GetStackOverflowReservedBytes(kRuntimeISA) << ")?";
-    DumpStack(LOG_STREAM(ERROR));
-    LOG(FATAL) << "Recursive stack overflow.";
-  }
-
-  tlsPtr_.stack_end = tlsPtr_.stack_begin;
-
-  // Remove the stack overflow protection if is it set up.
-  bool implicit_stack_check = Runtime::Current()->GetImplicitStackOverflowChecks();
-  if (implicit_stack_check) {
-    if (!UnprotectStack()) {
-      LOG(ERROR) << "Unable to remove stack protection for stack overflow";
-    }
-  }
-}
-
 void Thread::SetTlab(uint8_t* start, uint8_t* end, uint8_t* limit) {
   DCHECK_LE(start, end);
   DCHECK_LE(end, limit);
@@ -4691,8 +4695,9 @@ std::ostream& operator<<(std::ostream& os, const Thread& thread) {
   return os;
 }
 
+template <StackType stack_type>
 bool Thread::ProtectStack(bool fatal_on_error) {
-  void* pregion = tlsPtr_.stack_begin - GetStackOverflowProtectedSize();
+  void* pregion = GetStackBegin<stack_type>() - GetStackOverflowProtectedSize();
   VLOG(threads) << "Protecting stack at " << pregion;
   if (mprotect(pregion, GetStackOverflowProtectedSize(), PROT_NONE) == -1) {
     if (fatal_on_error) {
@@ -4707,8 +4712,9 @@ bool Thread::ProtectStack(bool fatal_on_error) {
   return true;
 }
 
+template <StackType stack_type>
 bool Thread::UnprotectStack() {
-  void* pregion = tlsPtr_.stack_begin - GetStackOverflowProtectedSize();
+  void* pregion = GetStackBegin<stack_type>() - GetStackOverflowProtectedSize();
   VLOG(threads) << "Unprotecting stack at " << pregion;
   return mprotect(pregion, GetStackOverflowProtectedSize(), PROT_READ|PROT_WRITE) == 0;
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index da9a70d8b1..96958f37b4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -198,6 +198,45 @@ enum class WeakRefAccessState : int32_t {
   kDisabled
 };
 
+// ART uses two types of ABI/code: quick and native.
+//
+// Quick code includes:
+// - The code that ART compiles to, e.g: Java/dex code compiled to Arm64.
+// - Quick assembly entrypoints.
+//
+// Native code includes:
+// - Interpreter.
+// - GC.
+// - JNI.
+// - Runtime methods, i.e.: all ART C++ code.
+//
+// In regular (non-simulator) mode, both native and quick code are of the same ISA and will operate
+// on the hardware stack. The hardware stack is allocated by the kernel to ART and grows down in
+// memory.
+//
+// In simulator mode, native and quick code use different ISA's and will use different stacks.
+// Native code will use the hardware stack while quick code will use the simulated stack. The
+// simulated stack is a simple buffer in the native heap owned by the Simulator class.
+//
+// The StackType enum reflects the underlying type of stack in use by any given function while two
+// constexpr StackTypes (kNativeStackType and kQuickStackType) indicate which type of stack is used
+// for native and quick code. Whenever possible kNativeStackType and kQuickStackType should be used
+// instead of using the StackType directly.
+enum class StackType {
+  kHardware,
+  kSimulated
+};
+
+// The type of stack used when executing native code, i.e.: runtime helpers, interpreter, JNI, etc.
+// This stack is the native machine's call stack and so should be used when comparing against
+// values returned from builtin functions such as __builtin_frame_address.
+static constexpr StackType kNativeStackType = StackType::kHardware;
+
+// The type of stack used when executing quick code, i.e.: compiled dex code and quick entrypoints.
+// For simulator builds this is the kSimulated stack and for non-simulator builds this is the
+// kHardware stack.
+static constexpr StackType kQuickStackType = StackType::kHardware;
+
 // See Thread.tlsPtr_.active_suspend1_barriers below for explanation.
 struct WrappedSuspend1Barrier {
   // TODO(b/23668816): At least weaken CHECKs to DCHECKs once the bug is fixed.
@@ -1109,24 +1148,29 @@ class EXPORT Thread {
   }
 
   // Size of stack less any space reserved for stack overflow
-  size_t GetStackSize() const {
-    return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
+  template <StackType stack_type>
+  size_t GetUsableStackSize() const {
+    return GetStackSize<stack_type>() - static_cast<size_t>(
+        GetStackEnd<stack_type>() - GetStackBegin<stack_type>());
   }
 
-  ALWAYS_INLINE uint8_t* GetStackEndForInterpreter(bool implicit_overflow_check) const;
+  template <StackType stack_type>
+  ALWAYS_INLINE uint8_t* GetStackEnd() const;
 
-  uint8_t* GetStackEnd() const {
-    return tlsPtr_.stack_end;
-  }
+  ALWAYS_INLINE uint8_t* GetStackEndForInterpreter(bool implicit_overflow_check) const;
 
   // Set the stack end to that to be used during a stack overflow
-  void SetStackEndForStackOverflow() REQUIRES_SHARED(Locks::mutator_lock_);
+  template <StackType stack_type>
+  ALWAYS_INLINE void SetStackEndForStackOverflow()
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Set the stack end to that to be used during regular execution
+  template <StackType stack_type>
   ALWAYS_INLINE void ResetDefaultStackEnd();
 
+  template <StackType stack_type>
   bool IsHandlingStackOverflow() const {
-    return tlsPtr_.stack_end == tlsPtr_.stack_begin;
+    return GetStackEnd<stack_type>() == GetStackBegin<stack_type>();
   }
 
   template<PointerSize pointer_size>
@@ -1170,6 +1214,9 @@ class EXPORT Thread {
         ManagedStack::TopShadowFrameOffset());
   }
 
+  // Is the given object on the quick stack?
+  bool IsRawObjOnQuickStack(uint8_t* raw_obj) const;
+
   // Is the given obj in one of this thread's JNI transition frames?
   bool IsJniTransitionReference(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -1499,7 +1546,9 @@ class EXPORT Thread {
     tlsPtr_.rosalloc_runs[index] = run;
   }
 
+  template <StackType stack_type>
   bool ProtectStack(bool fatal_on_error = true);
+  template <StackType stack_type>
   bool UnprotectStack();
 
   uint32_t DecrementForceInterpreterCount() REQUIRES(Locks::thread_list_lock_) {
@@ -1762,7 +1811,8 @@ class EXPORT Thread {
   void InitTlsEntryPoints();
   void InitTid();
   void InitPthreadKeySelf();
-  bool InitStackHwm();
+  template <StackType stack_type>
+  bool InitStack(uint8_t* read_stack_base, size_t read_stack_size, size_t read_guard_size);
 
   void SetUpAlternateSignalStack();
   void TearDownAlternateSignalStack();
@@ -1825,7 +1875,12 @@ class EXPORT Thread {
       REQUIRES_SHARED(Locks::mutator_lock_);
   void RunEmptyCheckpoint();
 
+  // Return the nearest page-aligned address below the current stack top.
+  template <StackType>
+  NO_INLINE uint8_t* FindStackTop();
+
   // Install the protected region for implicit stack checks.
+  template <StackType>
   void InstallImplicitProtection();
 
   template <bool kPrecise>
@@ -1835,6 +1890,22 @@ class EXPORT Thread {
 
   void SetCachedThreadName(const char* name);
 
+  // Helper functions to get/set the tls stack pointer variables.
+  template <StackType stack_type>
+  ALWAYS_INLINE void SetStackEnd(uint8_t* new_stack_end);
+
+  template <StackType stack_type>
+  ALWAYS_INLINE uint8_t* GetStackBegin() const;
+
+  template <StackType stack_type>
+  ALWAYS_INLINE void SetStackBegin(uint8_t* new_stack_begin);
+
+  template <StackType stack_type>
+  ALWAYS_INLINE size_t GetStackSize() const;
+
+  template <StackType stack_type>
+  ALWAYS_INLINE void SetStackSize(size_t new_stack_size);
+
   // Helper class for manipulating the 32 bits of atomically changed state and flags.
   class StateAndFlags {
    public:
@@ -2173,6 +2244,7 @@ class EXPORT Thread {
 
     // The end of this thread's stack. This is the lowest safely-addressable address on the stack.
     // We leave extra space so there's room for the code that throws StackOverflowError.
+    // Note: do not use directly, instead use GetStackEnd/SetStackEnd template function instead.
     uint8_t* stack_end;
 
     // The top of the managed stack often manipulated directly by compiler generated code.
@@ -2204,9 +2276,11 @@ class EXPORT Thread {
     jobject jpeer;
 
     // The "lowest addressable byte" of the stack.
+    // Note: do not use directly, instead use GetStackBegin/SetStackBegin template function instead.
     uint8_t* stack_begin;
 
     // Size of the stack.
+    // Note: do not use directly, instead use GetStackSize/SetStackSize template function instead.
     size_t stack_size;
 
     // Sampling profiler and AOT verification cannot happen on the same run, so we share