Tweak inlining heuristics.
go/lem driven:
Performance:
Richards +41%
CaffeineMethod +43%
ReversiBench: +52%
Towers: +73%
Tak: +85%
Memory use: 7% less memory
CompileTime: 14% increase
CodeSize: 8% increase
Last three measures are now more acceptable given we JIT.
Change-Id: Ic4aa6535d2b76cf3545ef00e9b2ae32330f10745
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e6acab4..38c2927 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -52,7 +52,7 @@
static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
static const bool kDefaultIncludePatchInformation = false;
static const size_t kDefaultInlineDepthLimit = 3;
- static const size_t kDefaultInlineMaxCodeUnits = 20;
+ static const size_t kDefaultInlineMaxCodeUnits = 32;
static constexpr size_t kUnsetInlineDepthLimit = -1;
static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index db11709..6d5f6eb 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -42,7 +42,14 @@
namespace art {
-static constexpr size_t kMaximumNumberOfHInstructions = 12;
+static constexpr size_t kMaximumNumberOfHInstructions = 32;
+
+// Limit the number of dex registers that we accumulate while inlining
+// to avoid creating large amount of nested environments.
+static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
+
+// Avoid inlining within a huge method due to memory pressure.
+static constexpr size_t kMaximumCodeUnitSize = 4096;
void HInliner::Run() {
const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
@@ -50,6 +57,9 @@
|| (compiler_options.GetInlineMaxCodeUnits() == 0)) {
return;
}
+ if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
+ return;
+ }
if (graph_->IsDebuggable()) {
// For simplicity, we currently never inline when the graph is debuggable. This avoids
// doing some logic in the runtime to discover if a method could have been inlined.
@@ -577,6 +587,7 @@
compiler_driver_,
handles_,
stats_,
+ total_number_of_dex_registers_ + code_item->registers_size_,
depth_ + 1);
inliner.Run();
number_of_instructions_budget += inliner.number_of_inlined_instructions_;
@@ -608,6 +619,10 @@
HReversePostOrderIterator it(*callee_graph);
it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining.
size_t number_of_instructions = 0;
+
+ bool can_inline_environment =
+ total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
+
for (; !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
if (block->IsLoopHeader()) {
@@ -621,10 +636,17 @@
instr_it.Advance()) {
if (number_of_instructions++ == number_of_instructions_budget) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
- << " could not be inlined because it is too big.";
+ << " is not inlined because its caller has reached"
+ << " its instruction budget limit.";
return false;
}
HInstruction* current = instr_it.Current();
+ if (!can_inline_environment && current->NeedsEnvironment()) {
+ VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ << " is not inlined because its caller has reached"
+ << " its environment budget limit.";
+ return false;
+ }
if (current->IsInvokeInterface()) {
// Disable inlining of interface calls. The cost in case of entering the
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7b9fb73..8de510e 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -40,13 +40,15 @@
CompilerDriver* compiler_driver,
StackHandleScopeCollection* handles,
OptimizingCompilerStats* stats,
- size_t depth = 0)
+ size_t total_number_of_dex_registers,
+ size_t depth)
: HOptimization(outer_graph, kInlinerPassName, stats),
outermost_graph_(outermost_graph),
outer_compilation_unit_(outer_compilation_unit),
caller_compilation_unit_(caller_compilation_unit),
codegen_(codegen),
compiler_driver_(compiler_driver),
+ total_number_of_dex_registers_(total_number_of_dex_registers),
depth_(depth),
number_of_inlined_instructions_(0),
handles_(handles) {}
@@ -88,6 +90,7 @@
const DexCompilationUnit& caller_compilation_unit_;
CodeGenerator* const codegen_;
CompilerDriver* const compiler_driver_;
+ const size_t total_number_of_dex_registers_;
const size_t depth_;
size_t number_of_inlined_instructions_;
StackHandleScopeCollection* const handles_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index ba43518..37127bb 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -426,8 +426,18 @@
if (!should_inline) {
return;
}
+ size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
HInliner* inliner = new (graph->GetArena()) HInliner(
- graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+ graph,
+ graph,
+ codegen,
+ dex_compilation_unit,
+ dex_compilation_unit,
+ driver,
+ handles,
+ stats,
+ number_of_dex_registers,
+ /* depth */ 0);
HOptimization* optimizations[] = { inliner };
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);