Optimize stack overflow handling.
We now subtract the frame size from the stack pointer for methods
which have a frame smaller than a certain size. Also changed code to
use slow paths instead of launchpads.
Delete kStackOverflow launchpad since it is no longer needed.
ARM optimizations:
One less move per stack overflow check (without fault handler for
stack overflows). Use ldr pc instead of ldr r12, b r12.
Code size (boot.oat):
Before: 58405348
After: 57803236
TODO: X86 doesn't have the case for large frames. This could case an
incoming signal to go past the end of the stack (unlikely however).
Change-Id: Ie3a5635cd6fb09de27960e1f8cee45bfae38fb33
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index bba3d40..94f0ca4 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -358,23 +358,60 @@
*/
NewLIR1(kThumb2VPushCS, num_fp_spills_);
}
+
+ // TODO: 64 bit will be different code.
+ const int frame_size_without_spills = frame_size_ - spill_count * 4;
if (!skip_overflow_check) {
if (Runtime::Current()->ExplicitStackOverflowChecks()) {
- OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_ - (spill_count * 4));
- GenRegRegCheck(kCondUlt, rARM_LR, r12, kThrowStackOverflow);
- OpRegCopy(rARM_SP, rARM_LR); // Establish stack
+ class StackOverflowSlowPath : public LIRSlowPath {
+ public:
+ StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
+ : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
+ sp_displace_(sp_displace) {
+ }
+ void Compile() OVERRIDE {
+ m2l_->ResetRegPool();
+ m2l_->ResetDefTracking();
+ GenerateTargetLabel();
+ if (restore_lr_) {
+ m2l_->LoadWordDisp(kArmRegSP, sp_displace_ - 4, kArmRegLR);
+ }
+ m2l_->OpRegImm(kOpAdd, kArmRegSP, sp_displace_);
+ m2l_->ClobberCallerSave();
+ ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
+ // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
+ // codegen and target are in thumb2 mode.
+ m2l_->LoadWordDisp(rARM_SELF, func_offset.Int32Value(), rARM_PC);
+ }
+
+ private:
+ const bool restore_lr_;
+ const size_t sp_displace_;
+ };
+ if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
+ OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_without_spills);
+ LIR* branch = OpCmpBranch(kCondUlt, rARM_LR, r12, nullptr);
+ // Need to restore LR since we used it as a temp.
+ AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true,
+ frame_size_without_spills));
+ OpRegCopy(rARM_SP, rARM_LR); // Establish stack
+ } else {
+ // If the frame is small enough we are guaranteed to have enough space that remains to
+ // handle signals on the user stack.
+ OpRegRegImm(kOpSub, rARM_SP, rARM_SP, frame_size_without_spills);
+ LIR* branch = OpCmpBranch(kCondUlt, rARM_SP, r12, nullptr);
+ AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
+ }
} else {
// Implicit stack overflow check.
// Generate a load from [sp, #-framesize]. If this is in the stack
// redzone we will get a segmentation fault.
- uint32_t full_frame_size = frame_size_ - (spill_count * 4);
-
- OpRegImm(kOpSub, rARM_SP, full_frame_size);
+ OpRegImm(kOpSub, rARM_SP, frame_size_without_spills);
LoadWordDisp(rARM_SP, 0, rARM_LR);
MarkPossibleStackOverflowException();
}
} else {
- OpRegImm(kOpSub, rARM_SP, frame_size_ - (spill_count * 4));
+ OpRegImm(kOpSub, rARM_SP, frame_size_without_spills);
}
FlushIns(ArgLocs, rl_method);