Tweak inlining heuristics.

go/lem driven:

Performance:
Richards +41%
CaffeineMethod +43%
ReversiBench: +52%
Towers: +73%
Tak: +85%

Memory use: 7% less memory
CompileTime: 14% increase
CodeSize: 8% increase

Last three measures are now more acceptable given we JIT.

Change-Id: Ic4aa6535d2b76cf3545ef00e9b2ae32330f10745
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e6acab4..38c2927 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -52,7 +52,7 @@
   static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
-  static const size_t kDefaultInlineMaxCodeUnits = 20;
+  static const size_t kDefaultInlineMaxCodeUnits = 32;
   static constexpr size_t kUnsetInlineDepthLimit = -1;
   static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index db11709..6d5f6eb 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -42,7 +42,14 @@
 
 namespace art {
 
-static constexpr size_t kMaximumNumberOfHInstructions = 12;
+static constexpr size_t kMaximumNumberOfHInstructions = 32;
+
+// Limit the number of dex registers that we accumulate while inlining
+// to avoid creating large amount of nested environments.
+static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
+
+// Avoid inlining within a huge method due to memory pressure.
+static constexpr size_t kMaximumCodeUnitSize = 4096;
 
 void HInliner::Run() {
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
@@ -50,6 +57,9 @@
       || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
     return;
   }
+  if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
+    return;
+  }
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
@@ -577,6 +587,7 @@
                      compiler_driver_,
                      handles_,
                      stats_,
+                     total_number_of_dex_registers_ + code_item->registers_size_,
                      depth_ + 1);
     inliner.Run();
     number_of_instructions_budget += inliner.number_of_inlined_instructions_;
@@ -608,6 +619,10 @@
   HReversePostOrderIterator it(*callee_graph);
   it.Advance();  // Past the entry block, it does not contain instructions that prevent inlining.
   size_t number_of_instructions = 0;
+
+  bool can_inline_environment =
+      total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
+
   for (; !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
@@ -621,10 +636,17 @@
          instr_it.Advance()) {
       if (number_of_instructions++ ==  number_of_instructions_budget) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                       << " could not be inlined because it is too big.";
+                       << " is not inlined because its caller has reached"
+                       << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
+      if (!can_inline_environment && current->NeedsEnvironment()) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " is not inlined because its caller has reached"
+                       << " its environment budget limit.";
+        return false;
+      }
 
       if (current->IsInvokeInterface()) {
         // Disable inlining of interface calls. The cost in case of entering the
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7b9fb73..8de510e 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -40,13 +40,15 @@
            CompilerDriver* compiler_driver,
            StackHandleScopeCollection* handles,
            OptimizingCompilerStats* stats,
-           size_t depth = 0)
+           size_t total_number_of_dex_registers,
+           size_t depth)
       : HOptimization(outer_graph, kInlinerPassName, stats),
         outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
         compiler_driver_(compiler_driver),
+        total_number_of_dex_registers_(total_number_of_dex_registers),
         depth_(depth),
         number_of_inlined_instructions_(0),
         handles_(handles) {}
@@ -88,6 +90,7 @@
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
+  const size_t total_number_of_dex_registers_;
   const size_t depth_;
   size_t number_of_inlined_instructions_;
   StackHandleScopeCollection* const handles_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ba43518..37127bb 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -426,8 +426,18 @@
   if (!should_inline) {
     return;
   }
+  size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
   HInliner* inliner = new (graph->GetArena()) HInliner(
-      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph,
+      graph,
+      codegen,
+      dex_compilation_unit,
+      dex_compilation_unit,
+      driver,
+      handles,
+      stats,
+      number_of_dex_registers,
+      /* depth */ 0);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);