ART: Target-dependent stack overflow, less check elision
Refactor the separate stack overflow reserved sizes from thread.h
into instruction_set.h and make sure they're used in the compiler.
Refactor the decision on when to elide stack overflow checks:
especially with large interpreter stack frames, it is not a good
idea to elide checks when the frame size is even close to the
reserved size. Currently enforce checks when the frame size is
>= 2KB, but make sure that frame sizes 1KB and below will elide
the checks (number from experience).
Bug: 15728765
Change-Id: I016bfd3d8218170cbccbd123ed5e2203db167c06
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5466abd..2bdf3e4 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -356,11 +356,11 @@
* We can safely skip the stack overflow check if we're
* a leaf *and* our frame size < fudge factor.
*/
- bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
- (static_cast<size_t>(frame_size_) <
- Thread::kStackOverflowReservedBytes));
+ bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
NewLIR0(kPseudoMethodEntry);
- bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+ constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+ Thread::kStackOverflowSignalReservedBytes;
+ bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
if (!skip_overflow_check) {
if (Runtime::Current()->ExplicitStackOverflowChecks()) {
if (!large_frame) {
@@ -381,7 +381,7 @@
// This is done before the callee save instructions to avoid any possibility
// of these overflowing. This uses r12 and that's never saved in a callee
// save.
- OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+ OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
Load32Disp(rs_r12, 0, rs_r12);
MarkPossibleStackOverflowException();
}
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f1748ef..35263ea 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -337,13 +337,13 @@
* We can safely skip the stack overflow check if we're
* a leaf *and* our frame size < fudge factor.
*/
- bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
- (static_cast<size_t>(frame_size_) <
- Thread::kStackOverflowReservedBytes));
+ bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
NewLIR0(kPseudoMethodEntry);
- const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+ constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
+ Thread::kStackOverflowSignalReservedBytes;
+ const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
const int spill_count = num_core_spills_ + num_fp_spills_;
const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf; // SP 16 byte alignment.
const int frame_size_without_spills = frame_size_ - spill_size;
@@ -412,7 +412,7 @@
// Branch to throw target if there is not enough room.
OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
- LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+ LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
OpRegCopy(rs_rA64_SP, rs_x9); // Establish stack after checks.
} else {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c734202..e53105f 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -305,8 +305,7 @@
* We can safely skip the stack overflow check if we're
* a leaf *and* our frame size < fudge factor.
*/
- bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
- (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+ bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
NewLIR0(kPseudoMethodEntry);
RegStorage check_reg = AllocTemp();
RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ca65432..f70087d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -25,6 +25,7 @@
#include "dex/backend.h"
#include "dex/quick/resource_mask.h"
#include "driver/compiler_driver.h"
+#include "instruction_set.h"
#include "leb128.h"
#include "safe_map.h"
#include "utils/array_ref.h"
@@ -206,6 +207,36 @@
#define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
#define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
+ return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
+ kSmallFrameSize_is_not_a_small_frame_arm);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
+ kSmallFrameSize_is_not_a_small_frame_arm64);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
+ kSmallFrameSize_is_not_a_small_frame_mips);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
+ kSmallFrameSize_is_not_a_small_frame_x86);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
+ kSmallFrameSize_is_not_a_small_frame_x64_64);
+
class Mir2Lir : public Backend {
public:
/*
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index dd5dab2..28195ab 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -235,8 +235,8 @@
* We can safely skip the stack overflow check if we're
* a leaf *and* our frame size < fudge factor.
*/
- const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
- (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+ const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+ !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
NewLIR0(kPseudoMethodEntry);
/* Spill core callee saves */
SpillCoreRegs();
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index f81e2f9..2a82129 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -22,6 +22,7 @@
#include "globals.h"
#include "base/logging.h"
#include "base/hex_dump.h"
+#include "instruction_set.h"
#include "mirror/art_method.h"
#include "mirror/art_method-inl.h"
#include "thread.h"
@@ -59,7 +60,7 @@
// get the method from the top of the stack. However it's in r0.
uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
- reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes);
+ reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
if (overflow_addr == fault_addr) {
*out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
} else {
@@ -190,7 +191,7 @@
VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
", fault_addr: " << fault_addr;
- uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+ uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
CHECK_EQ(self, Thread::Current());
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 67e7100..96eeb8d 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -22,6 +22,7 @@
#include "base/logging.h" // Logging is required for FATAL in the helper functions.
#include "base/macros.h"
+#include "globals.h" // For KB.
namespace art {
@@ -36,6 +37,20 @@
};
std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
// Architecture-specific pointer sizes
static constexpr size_t kArmPointerSize = 4;
static constexpr size_t kArm64PointerSize = 8;
@@ -153,19 +168,33 @@
}
}
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: shrink reserved space, in particular for 64bit.
+
+// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
+// But this one works rather well.
+static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
+// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+// test-art-host-run-test-interpreter-018-stack-overflow
+// test-art-host-run-test-interpreter-107-int-math2
+static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
+
+static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+ return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
+ isa == kArm64 ? kArm64StackOverflowReservedBytes :
+ isa == kMips ? kMipsStackOverflowReservedBytes :
+ isa == kX86 ? kX86StackOverflowReservedBytes :
+ isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
+ isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
+ (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+static constexpr size_t kRuntimeStackOverflowReservedBytes =
+ GetStackOverflowReservedBytes(kRuntimeISA);
enum InstructionFeatures {
kHwDiv = 0x1, // Supports hardware divide.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6980530..3f8f4a3 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -220,7 +220,7 @@
// It's likely that callers are trying to ensure they have at least a certain amount of
// stack space, so we should add our reserved space on top of what they requested, rather
// than implicitly take it away from them.
- stack_size += Thread::kStackOverflowReservedBytes;
+ stack_size += kRuntimeStackOverflowReservedBytes;
} else {
// If we are going to use implicit stack checks, allocate space for the protected
// region at the bottom of the stack.
@@ -489,7 +489,7 @@
tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
tlsPtr_.stack_size = read_stack_size;
- if (read_stack_size <= kStackOverflowReservedBytes) {
+ if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
<< " bytes)";
}
@@ -2200,7 +2200,7 @@
if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
// However, we seem to have already extended to use the full stack.
LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
- << kStackOverflowReservedBytes << ")?";
+ << kRuntimeStackOverflowReservedBytes << ")?";
DumpStack(LOG(ERROR));
LOG(FATAL) << "Recursive stack overflow.";
}
diff --git a/runtime/thread.h b/runtime/thread.h
index bff9b52..7cd86de 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,6 +33,7 @@
#include "gc/allocator/rosalloc.h"
#include "globals.h"
#include "handle_scope.h"
+#include "instruction_set.h"
#include "jvalue.h"
#include "object_callbacks.h"
#include "offsets.h"
@@ -94,28 +95,8 @@
class Thread {
public:
- // Space to throw a StackOverflowError in.
- // TODO: shrink reserved space, in particular for 64bit.
-#if defined(__x86_64__)
- static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__aarch64__)
- // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
- // But this one works rather well.
- static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__i386__)
- // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
- // test-art-host-run-test-interpreter-018-stack-overflow
- // test-art-host-run-test-interpreter-107-int-math2
- static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
-#else
- static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
-#endif
// How much of the reserved bytes is reserved for incoming signals.
static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
- // How much of the reserved bytes we may temporarily use during stack overflow checks as an
- // optimization.
- static constexpr size_t kStackOverflowReservedUsableBytes =
- kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
// For implicit overflow checks we reserve an extra piece of memory at the bottom
// of the stack (lowest memory). The higher portion of the memory
@@ -123,7 +104,7 @@
// throwing the StackOverflow exception.
static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
- kStackOverflowReservedBytes;
+ kRuntimeStackOverflowReservedBytes;
// Creates a new native thread corresponding to the given managed peer.
// Used to implement Thread.start.
@@ -585,7 +566,7 @@
// overflow region.
tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
} else {
- tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes;
+ tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
}
}