1 files changed, 120 insertions, 53 deletions
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9fa158d5e0..8e6da74e5e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,6 +76,8 @@ namespace art {
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
+const size_t Thread::kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -219,7 +221,7 @@ static size_t FixStackSize(size_t stack_size) {
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += kRuntimeStackOverflowReservedBytes;
+    stack_size += GetStackOverflowReservedBytes(kRuntimeISA);
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -232,47 +234,95 @@ static size_t FixStackSize(size_t stack_size) {
   return stack_size;
 }
 
+// Global variable to prevent the compiler optimizing away the page reads for the stack.
+byte dont_optimize_this;
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_end_.  Just below that
 // is the StackOverflow reserved region used when creating the StackOverflow
 // exception.
+//
+// There is a little complexity here that deserves a special mention.  When running on the
+// host (glibc), the process's main thread's stack is allocated with a special flag
+// to prevent memory being allocated when it's not needed.  This flag makes the
+// kernel only allocate memory for the stack by growing down in memory.  Because we
+// want to put an mprotected region far away from that at the stack top, we need
+// to make sure the pages for the stack are mapped in before we call mprotect.  We do
+// this by reading every page from the stack bottom (highest address) to the stack top.
+// We then madvise this away.
 void Thread::InstallImplicitProtection(bool is_main_stack) {
   byte* pregion = tlsPtr_.stack_end;
+  byte* stack_lowmem = tlsPtr_.stack_begin;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
+      ~(kPageSize - 1));    // Page containing current top of stack.
+
+  const bool running_on_intel = (kRuntimeISA == kX86) || (kRuntimeISA == kX86_64);
+
+  if (running_on_intel) {
+    // On Intel, we need to map in the main stack.  This must be done by reading from the
+    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
+    // in the kernel.  Any access more than a page below the current SP will cause
+    // a segv.
+    if (is_main_stack) {
+      // First we need to unprotect the protected region because this may
+      // be called more than once for a particular stack and we will crash
+      // if we try to read the protected page.
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
+
+      // Read every page from the high address to the low.
+      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
+        dont_optimize_this = *p;
+      }
+    }
+  }
 
+  // Check and place a marker word at the lowest usable address in the stack.  This
+  // is used to prevent a double protection.
   constexpr uint32_t kMarker = 0xdadadada;
   uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
   if (*marker == kMarker) {
-    // The region has already been set up.
+    // The region has already been set up.  But on the main stack on the host we have
+    // removed the protected region in order to read the stack memory.  We need to put
+    // this back again.
+    if (is_main_stack && running_on_intel) {
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
+      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
+    }
     return;
   }
   // Add marker so that we can detect a second attempt to do this.
   *marker = kMarker;
 
-  pregion -= kStackOverflowProtectedSize;
-
-  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.  We only need to touch one byte per page.
-  if (is_main_stack) {
-    byte* start = pregion;
-    byte* end = pregion + kStackOverflowProtectedSize;
-    while (start < end) {
-      *start = static_cast<byte>(0);
-      start += kPageSize;
+  if (!running_on_intel) {
+    // Running on !Intel, stacks are mapped cleanly.  The protected region for the
+    // main stack just needs to be mapped in.  We do this by writing one byte per page.
+    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
+      *p = 0;
     }
   }
 
+  pregion -= kStackOverflowProtectedSize;
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
+
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
-        << strerror(errno);
+        << strerror(errno) << kStackOverflowProtectedSize;
   }
 
   // Tell the kernel that we won't be needing these pages any more.
+  // NB. madvise will probably write zeroes into the memory (on linux it does).
   if (is_main_stack) {
-    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    if (running_on_intel) {
+      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
+      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
+    } else {
+      // On Android, just the protected region.
+      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    }
   }
 }
 
@@ -488,7 +538,7 @@ void Thread::InitStackHwm() {
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
+  if (read_stack_size <= GetStackOverflowReservedBytes(kRuntimeISA)) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -533,13 +583,17 @@ void Thread::InitStackHwm() {
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
     if (is_main_thread) {
-      // The main thread has a 16K protected region at the bottom.  We need
+      size_t guardsize;
+      pthread_attr_t attributes;
+      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+      // The main thread might have protected region at the bottom.  We need
       // to install our own region so we need to move the limits
       // of the stack to make room for it.
-      constexpr uint32_t kDelta = 16 * KB;
-      tlsPtr_.stack_begin += kDelta;
-      tlsPtr_.stack_end += kDelta;
-      tlsPtr_.stack_size -= kDelta;
+      tlsPtr_.stack_begin += guardsize;
+      tlsPtr_.stack_end += guardsize;
+      tlsPtr_.stack_size -= guardsize;
     }
     InstallImplicitProtection(is_main_thread);
   }
@@ -1086,7 +1140,7 @@ void Thread::AssertNoPendingExceptionForNewException(const char* msg) const {
   if (UNLIKELY(IsExceptionPending())) {
     ScopedObjectAccess soa(Thread::Current());
     mirror::Throwable* exception = GetException(nullptr);
-    LOG(FATAL) << "Throwing new exception " << msg << " with unexpected pending exception: "
+    LOG(FATAL) << "Throwing new exception '" << msg << "' with unexpected pending exception: "
         << exception->Dump();
   }
 }
@@ -1109,6 +1163,21 @@ void Thread::Destroy() {
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
 
+  if (tlsPtr_.jni_env != nullptr) {
+    // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
+    tlsPtr_.jni_env->monitors.VisitRoots(MonitorExitVisitor, self, 0, kRootVMInternal);
+    // Release locally held global references which releasing may require the mutator lock.
+    if (tlsPtr_.jpeer != nullptr) {
+      // If pthread_create fails we don't have a jni env here.
+      tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.jpeer);
+      tlsPtr_.jpeer = nullptr;
+    }
+    if (tlsPtr_.class_loader_override != nullptr) {
+      tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.class_loader_override);
+      tlsPtr_.class_loader_override = nullptr;
+    }
+  }
+
   if (tlsPtr_.opeer != nullptr) {
     ScopedObjectAccess soa(self);
     // We may need to call user-supplied managed code, do this before final clean-up.
@@ -1136,22 +1205,16 @@ void Thread::Destroy() {
       ObjectLock<mirror::Object> locker(self, h_obj);
       locker.NotifyAll();
     }
+    tlsPtr_.opeer = nullptr;
   }
 
-  // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
-  if (tlsPtr_.jni_env != nullptr) {
-    tlsPtr_.jni_env->monitors.VisitRoots(MonitorExitVisitor, self, 0, kRootVMInternal);
-  }
+  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
 }
 
 Thread::~Thread() {
-  if (tlsPtr_.jni_env != nullptr && tlsPtr_.jpeer != nullptr) {
-    // If pthread_create fails we don't have a jni env here.
-    tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.jpeer);
-    tlsPtr_.jpeer = nullptr;
-  }
-  tlsPtr_.opeer = nullptr;
-
+  CHECK(tlsPtr_.class_loader_override == nullptr);
+  CHECK(tlsPtr_.jpeer == nullptr);
+  CHECK(tlsPtr_.opeer == nullptr);
   bool initialized = (tlsPtr_.jni_env != nullptr);  // Did Thread::Init run?
   if (initialized) {
     delete tlsPtr_.jni_env;
@@ -1183,7 +1246,7 @@ Thread::~Thread() {
   delete tlsPtr_.name;
   delete tlsPtr_.stack_trace_sample;
 
-  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+  Runtime::Current()->GetHeap()->AssertThreadLocalBuffersAreRevoked(this);
 
   TearDownAlternateSignalStack();
 }
@@ -1293,11 +1356,10 @@ mirror::Object* Thread::DecodeJObject(jobject obj) const {
       result = kInvalidIndirectRefObject;
     }
   } else if (kind == kGlobal) {
-    JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-    result = vm->globals.SynchronizedGet(const_cast<Thread*>(this), &vm->globals_lock, ref);
+    result = tlsPtr_.jni_env->vm->DecodeGlobal(const_cast<Thread*>(this), ref);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
-    result = Runtime::Current()->GetJavaVM()->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
+    result = tlsPtr_.jni_env->vm->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
     if (result == kClearedJniWeakGlobal) {
       // This is a special case where it's okay to return nullptr.
       return nullptr;
@@ -1305,7 +1367,8 @@ mirror::Object* Thread::DecodeJObject(jobject obj) const {
   }
 
   if (UNLIKELY(result == nullptr)) {
-    JniAbortF(nullptr, "use of deleted %s %p", ToStr<IndirectRefKind>(kind).c_str(), obj);
+    tlsPtr_.jni_env->vm->JniAbortF(nullptr, "use of deleted %s %p",
+                                   ToStr<IndirectRefKind>(kind).c_str(), obj);
   }
   return result;
 }
@@ -1345,6 +1408,13 @@ void Thread::NotifyLocked(Thread* self) {
   }
 }
 
+void Thread::SetClassLoaderOverride(jobject class_loader_override) {
+  if (tlsPtr_.class_loader_override != nullptr) {
+    GetJniEnv()->DeleteGlobalRef(tlsPtr_.class_loader_override);
+  }
+  tlsPtr_.class_loader_override = GetJniEnv()->NewGlobalRef(class_loader_override);
+}
+
 class CountStackDepthVisitor : public StackVisitor {
  public:
   explicit CountStackDepthVisitor(Thread* thread)
@@ -1879,6 +1949,8 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
   QUICK_ENTRY_POINT_INFO(pThrowNoSuchMethod)
   QUICK_ENTRY_POINT_INFO(pThrowNullPointer)
   QUICK_ENTRY_POINT_INFO(pThrowStackOverflow)
+  QUICK_ENTRY_POINT_INFO(pA64Load)
+  QUICK_ENTRY_POINT_INFO(pA64Store)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
@@ -1916,10 +1988,13 @@ Context* Thread::GetLongJumpContext() {
   return result;
 }
 
+// Note: this visitor may return with a method set, but dex_pc_ being DexFile:kDexNoIndex. This is
+//       so we don't abort in a special situation (thinlocked monitor) when dumping the Java stack.
 struct CurrentMethodVisitor FINAL : public StackVisitor {
-  CurrentMethodVisitor(Thread* thread, Context* context)
+  CurrentMethodVisitor(Thread* thread, Context* context, bool abort_on_error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object_(nullptr), method_(nullptr), dex_pc_(0) {}
+      : StackVisitor(thread, context), this_object_(nullptr), method_(nullptr), dex_pc_(0),
+        abort_on_error_(abort_on_error) {}
   bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
     if (m->IsRuntimeMethod()) {
@@ -1930,16 +2005,17 @@ struct CurrentMethodVisitor FINAL : public StackVisitor {
       this_object_ = GetThisObject();
     }
     method_ = m;
-    dex_pc_ = GetDexPc();
+    dex_pc_ = GetDexPc(abort_on_error_);
     return false;
   }
   mirror::Object* this_object_;
   mirror::ArtMethod* method_;
   uint32_t dex_pc_;
+  const bool abort_on_error_;
 };
 
-mirror::ArtMethod* Thread::GetCurrentMethod(uint32_t* dex_pc) const {
-  CurrentMethodVisitor visitor(const_cast<Thread*>(this), nullptr);
+mirror::ArtMethod* Thread::GetCurrentMethod(uint32_t* dex_pc, bool abort_on_error) const {
+  CurrentMethodVisitor visitor(const_cast<Thread*>(this), nullptr, abort_on_error);
   visitor.WalkStack(false);
   if (dex_pc != nullptr) {
     *dex_pc = visitor.dex_pc_;
@@ -1949,7 +2025,7 @@ mirror::ArtMethod* Thread::GetCurrentMethod(uint32_t* dex_pc) const {
 
 ThrowLocation Thread::GetCurrentLocationForThrow() {
   Context* context = GetLongJumpContext();
-  CurrentMethodVisitor visitor(this, context);
+  CurrentMethodVisitor visitor(this, context, true);
   visitor.WalkStack(false);
   ReleaseLongJumpContext(context);
   return ThrowLocation(visitor.this_object_, visitor.method_, visitor.dex_pc_);
@@ -2113,11 +2189,6 @@ class RootCallbackVisitor {
   const uint32_t tid_;
 };
 
-void Thread::SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
-  VerifyObject(class_loader_override);
-  tlsPtr_.class_loader_override = class_loader_override;
-}
-
 void Thread::VisitRoots(RootCallback* visitor, void* arg) {
   uint32_t thread_id = GetThreadId();
   if (tlsPtr_.opeer != nullptr) {
@@ -2127,10 +2198,6 @@ void Thread::VisitRoots(RootCallback* visitor, void* arg) {
     visitor(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception), arg, thread_id, kRootNativeStack);
   }
   tlsPtr_.throw_location.VisitRoots(visitor, arg);
-  if (tlsPtr_.class_loader_override != nullptr) {
-    visitor(reinterpret_cast<mirror::Object**>(&tlsPtr_.class_loader_override), arg, thread_id,
-            kRootNativeStack);
-  }
   if (tlsPtr_.monitor_enter_object != nullptr) {
     visitor(&tlsPtr_.monitor_enter_object, arg, thread_id, kRootNativeStack);
   }
@@ -2193,7 +2260,7 @@ void Thread::SetStackEndForStackOverflow() {
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kRuntimeStackOverflowReservedBytes << ")?";
+               << GetStackOverflowReservedBytes(kRuntimeISA) << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }