ART: Speed up stack guard page install
Only the main thread doesn't have its stack mapped in under normal
conditions. Reading each page is a lot of overhead and we should
try to avoid it.
Rewrite to first try a (non-fatal) protect. If the outcome is a
success, finish. Otherwise do the stack mapping, and try again.
Bug: 27718174
Change-Id: I16b214567585ed2f09970f618ccdec7eed219fd3
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6b8c0c2..2176444 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -511,23 +511,8 @@
return stack_size;
}
-// Global variable to prevent the compiler optimizing away the page reads for the stack.
-uint8_t dont_optimize_this;
-
// Install a protected region in the stack. This is used to trigger a SIGSEGV if a stack
// overflow is detected. It is located right below the stack_begin_.
-//
-// There is a little complexity here that deserves a special mention. On some
-// architectures, the stack created using a VM_GROWSDOWN flag
-// to prevent memory being allocated when it's not needed. This flag makes the
-// kernel only allocate memory for the stack by growing down in memory. Because we
-// want to put an mprotected region far away from that at the stack top, we need
-// to make sure the pages for the stack are mapped in before we call mprotect. We do
-// this by reading every page from the stack bottom (highest address) to the stack top.
-// We then madvise this away.
-
-// AddressSanitizer does not like the part of this functions that reads every stack page.
-// Looks a lot like an out-of-bounds access.
ATTRIBUTE_NO_SANITIZE_ADDRESS
void Thread::InstallImplicitProtection() {
uint8_t* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
@@ -535,27 +520,57 @@
uint8_t* stack_top = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(&stack_himem) &
~(kPageSize - 1)); // Page containing current top of stack.
- // First remove the protection on the protected region as will want to read and
- // write it. This may fail (on the first attempt when the stack is not mapped)
- // but we ignore that.
+ // Try to directly protect the stack.
+ VLOG(threads) << "installing stack protected region at " << std::hex <<
+ static_cast<void*>(pregion) << " to " <<
+ static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
+ if (ProtectStack(/* fatal_on_error */ false)) {
+ // Tell the kernel that we won't be needing these pages any more.
+ // NB. madvise will probably write zeroes into the memory (on linux it does).
+ uint32_t unwanted_size = stack_top - pregion - kPageSize;
+ madvise(pregion, unwanted_size, MADV_DONTNEED);
+ return;
+ }
+
+ // There is a little complexity here that deserves a special mention. On some
+ // architectures, the stack is created using a VM_GROWSDOWN flag
+ // to prevent memory being allocated when it's not needed. This flag makes the
+ // kernel only allocate memory for the stack by growing down in memory. Because we
+ // want to put an mprotected region far away from that at the stack top, we need
+ // to make sure the pages for the stack are mapped in before we call mprotect.
+ //
+ // The failed mprotect in UnprotectStack is an indication of a thread with VM_GROWSDOWN
+ // with a non-mapped stack (usually only the main thread).
+ //
+ // We map in the stack by reading every page from the stack bottom (highest address)
+ // to the stack top. (We then madvise this away.) This must be done by reading from the
+ // current stack pointer downwards. Any access more than a page below the current SP
+ // might cause a segv.
+ // TODO: This comment may be out of date. It seems possible to speed this up. As
+ // this is normally done once in the zygote on startup, ignore for now.
+ //
+ // AddressSanitizer does not like the part of this functions that reads every stack page.
+ // Looks a lot like an out-of-bounds access.
+
+ // (Defensively) first remove the protection on the protected region as will want to read
+ // and write it. Ignore errors.
UnprotectStack();
- // Map in the stack. This must be done by reading from the
- // current stack pointer downwards as the stack may be mapped using VM_GROWSDOWN
- // in the kernel. Any access more than a page below the current SP might cause
- // a segv.
+ VLOG(threads) << "Need to map in stack for thread at " << std::hex <<
+ static_cast<void*>(pregion);
// Read every page from the high address to the low.
+ volatile uint8_t dont_optimize_this;
for (uint8_t* p = stack_top; p >= pregion; p -= kPageSize) {
dont_optimize_this = *p;
}
- VLOG(threads) << "installing stack protected region at " << std::hex <<
+ VLOG(threads) << "(again) installing stack protected region at " << std::hex <<
static_cast<void*>(pregion) << " to " <<
static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
// Protect the bottom of the stack to prevent read/write to it.
- ProtectStack();
+ ProtectStack(/* fatal_on_error */ true);
// Tell the kernel that we won't be needing these pages any more.
// NB. madvise will probably write zeroes into the memory (on linux it does).
@@ -2945,14 +2960,18 @@
return os;
}
-void Thread::ProtectStack() {
+bool Thread::ProtectStack(bool fatal_on_error) {
void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
VLOG(threads) << "Protecting stack at " << pregion;
if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
- LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
- "Reason: "
- << strerror(errno) << " size: " << kStackOverflowProtectedSize;
+ if (fatal_on_error) {
+ LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
+ "Reason: "
+ << strerror(errno) << " size: " << kStackOverflowProtectedSize;
+ }
+ return false;
}
+ return true;
}
bool Thread::UnprotectStack() {
diff --git a/runtime/thread.h b/runtime/thread.h
index 234750c..3123c71 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1038,7 +1038,7 @@
tlsPtr_.rosalloc_runs[index] = run;
}
- void ProtectStack();
+ bool ProtectStack(bool fatal_on_error = true);
bool UnprotectStack();
void SetMterpDefaultIBase(void* ibase) {