Stack overflow checks and NPE checks for optimizing.

Change-Id: I59e97448bf29778769b79b51ee4ea43f43493d96
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f594129..1f0b361 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -34,6 +34,37 @@
 
 namespace art {
 
+/**
+ * Helper class to add HTemporary instructions. This class is used when
+ * converting a DEX instruction to multiple HInstruction, and where those
+ * instructions do not die at the following instruction, but instead spans
+ * multiple instructions.
+ */
+class Temporaries : public ValueObject {
+ public:
+  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
+    graph_->UpdateNumberOfTemporaries(count_);
+  }
+
+  void Add(HInstruction* instruction) {
+    // We currently only support vreg size temps.
+    DCHECK(instruction->GetType() != Primitive::kPrimLong
+           && instruction->GetType() != Primitive::kPrimDouble);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    instruction->GetBlock()->AddInstruction(temp);
+    DCHECK(temp->GetPrevious() == instruction);
+  }
+
+ private:
+  HGraph* const graph_;
+
+  // The total number of temporaries that will be used.
+  const size_t count_;
+
+  // Current index in the temporary stack, updated by `Add`.
+  size_t index_;
+};
+
 static bool IsTypeSupported(Primitive::Type type) {
   return type != Primitive::kPrimFloat && type != Primitive::kPrimDouble;
 }
@@ -308,9 +339,13 @@
       arena_, number_of_arguments, return_type, dex_offset, method_idx);
 
   size_t start_index = 0;
+  Temporaries temps(graph_, is_instance_call ? 1 : 0);
   if (is_instance_call) {
     HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    invoke->SetArgumentAt(0, arg);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset);
+    current_block_->AddInstruction(null_check);
+    temps.Add(null_check);
+    invoke->SetArgumentAt(0, null_check);
     start_index = 1;
   }
 
@@ -343,37 +378,6 @@
   return true;
 }
 
-/**
- * Helper class to add HTemporary instructions. This class is used when
- * converting a DEX instruction to multiple HInstruction, and where those
- * instructions do not die at the following instruction, but instead spans
- * multiple instructions.
- */
-class Temporaries : public ValueObject {
- public:
-  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
-    graph_->UpdateNumberOfTemporaries(count_);
-  }
-
-  void Add(HInstruction* instruction) {
-    // We currently only support vreg size temps.
-    DCHECK(instruction->GetType() != Primitive::kPrimLong
-           && instruction->GetType() != Primitive::kPrimDouble);
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
-    instruction->GetBlock()->AddInstruction(temp);
-    DCHECK(temp->GetPrevious() == instruction);
-  }
-
- private:
-  HGraph* const graph_;
-
-  // The total number of temporaries that will be used.
-  const size_t count_;
-
-  // Current index in the temporary stack, updated by `Add`.
-  size_t index_;
-};
-
 bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction,
                                      uint32_t dex_offset,
                                      bool is_put) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e0db0f1..c0964e6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -37,6 +37,9 @@
   block_labels_.SetSize(blocks.Size());
 
   DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+  // The baseline compiler does not do graph analysis prior to generating
+  // code.
+  MarkNotLeaf();
   ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs()
                    + GetGraph()->GetNumberOfLocalVRegs()
                    + GetGraph()->GetNumberOfTemporaries()
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 18e3e5a..936ca28 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -131,6 +131,14 @@
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
+  bool IsLeafMethod() const {
+    return is_leaf_;
+  }
+
+  void MarkNotLeaf() {
+    is_leaf_ = false;
+  }
+
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
@@ -138,7 +146,8 @@
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
         slow_paths_(graph->GetArena(), 8),
-        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {}
+        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)),
+        is_leaf_(true) {}
   ~CodeGenerator() {}
 
   // Register allocation logic.
@@ -171,6 +180,8 @@
   // Temporary data structure used when doing register allocation.
   bool* const blocked_registers_;
 
+  bool is_leaf_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 93e7367..90ec6cf 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -24,6 +24,7 @@
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/stack_checks.h"
 
 namespace art {
 
@@ -33,6 +34,11 @@
 
 namespace arm {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathARM : public SlowPathCode {
@@ -52,6 +58,20 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
+class StackOverflowCheckSlowPathARM : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathARM() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ LoadFromOffset(kLoadWord, PC, TR,
+        QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value());
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM);
+};
+
 #undef __
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
@@ -83,9 +103,6 @@
   return EQ;        // Unreachable.
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << ArmManagedRegister::FromCoreRegister(Register(reg));
 }
@@ -207,6 +224,22 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kArm);
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
+      AddSlowPath(slow_path);
+
+      __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value());
+      __ cmp(SP, ShifterOperand(IP));
+      __ b(slow_path->GetEntryLabel(), CC);
+    } else {
+      __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+      __ ldr(IP, Address(IP, 0));
+      RecordPcInfo(0);
+    }
+  }
+
   core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
   __ PushList(1 << LR | 1 << R6 | 1 << R7);
 
@@ -720,6 +753,7 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(ArmCoreLocation(R0));
 
@@ -785,6 +819,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
@@ -923,6 +958,7 @@
 };
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(0)));
@@ -941,6 +977,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c44b761..f1eb372 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -17,6 +17,7 @@
 #include "code_generator_x86.h"
 #include "gc/accounting/card_table.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/managed_register_x86.h"
 
@@ -33,6 +34,11 @@
 
 namespace x86 {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86 : public SlowPathCode {
@@ -50,6 +56,21 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
+class StackOverflowCheckSlowPathX86 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addl(ESP,
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+    __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow)));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86);
+};
+
 #undef __
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
@@ -67,9 +88,6 @@
   return kEqual;
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -186,6 +204,20 @@
 
   // The return PC has already been pushed on the stack.
   __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86);
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86();
+      AddSlowPath(slow_path);
+
+      __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>()));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
+    }
+  }
+
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
 }
 
@@ -682,6 +714,7 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86CpuLocation(EAX));
 
@@ -739,6 +772,7 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
@@ -889,6 +923,7 @@
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86CpuLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
@@ -906,6 +941,7 @@
       Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck)));
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 9c12771..b7c2ad8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -126,7 +126,7 @@
 class CodeGeneratorX86 : public CodeGenerator {
  public:
   explicit CodeGeneratorX86(HGraph* graph);
-  virtual ~CodeGeneratorX86() { }
+  virtual ~CodeGeneratorX86() {}
 
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d20dff0..641e8e1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -23,6 +23,7 @@
 #include "mirror/object_reference.h"
 #include "thread.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 
@@ -34,6 +35,15 @@
 
 namespace x86_64 {
 
+static constexpr bool kExplicitStackOverflowCheck = true;
+
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+
 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -42,7 +52,8 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
+    __ gs()->call(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
     codegen->RecordPcInfo(dex_pc_);
   }
 
@@ -51,6 +62,22 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
+class StackOverflowCheckSlowPathX86_64 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86_64() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addq(CpuRegister(RSP),
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+    __ gs()->jmp(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64);
+};
+
 #undef __
 #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -68,12 +95,6 @@
   return kEqual;
 }
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -148,7 +169,26 @@
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
   // The return PC has already been pushed on the stack.
-  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+  __ subq(CpuRegister(RSP),
+          Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+
+  bool skip_overflow_check = IsLeafMethod()
+      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
+      AddSlowPath(slow_path);
+
+      __ gs()->cmpq(CpuRegister(RSP),
+                    Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ testq(CpuRegister(RAX), Address(
+          CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
+    }
+  }
+
   __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
 }
 
@@ -619,6 +659,7 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86_64CpuLocation(RDI));
 
@@ -673,6 +714,7 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
@@ -809,6 +851,7 @@
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86_64CpuLocation(RAX));
   instruction->SetLocations(locations);
@@ -822,6 +865,7 @@
   __ gs()->call(Address::Absolute(
       QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(instruction->GetDexPc());
 }