Fix compilation statistics collection in inliner.
Stats from callee graph builder were not merged into main
stats and stats for callee graph optimizations were counted
even when the callee graph was eventually rejected.
Allocate the callee graph statistics on the arena.
Measured compilation of a big app using heaptrack:
bytes allocated in total (ignoring deallocations): 3.77GB -> 3.37GB
calls to allocation functions: 10650510 -> 8203129
Test: testrunner.py --host
Test: Stats change in the expected direction for an app.
Bug: 34053922
Change-Id: I605280d262b86af14b847acf3bb6dc077b749cc0
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8c73f1d..3e34090 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1272,12 +1272,19 @@
caller_instruction_counter);
callee_graph->SetArtMethod(resolved_method);
- // When they are needed, allocate `inline_stats` on the heap instead
+ // When they are needed, allocate `inline_stats_` on the Arena instead
// of on the stack, as Clang might produce a stack frame too large
// for this function, that would not fit the requirements of the
// `-Wframe-larger-than` option.
- std::unique_ptr<OptimizingCompilerStats> inline_stats =
- (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
+ if (stats_ != nullptr) {
+ // Reuse one object for all inline attempts from this caller to keep Arena memory usage low.
+ if (inline_stats_ == nullptr) {
+ void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc);
+ inline_stats_ = new (storage) OptimizingCompilerStats;
+ } else {
+ inline_stats_->Reset();
+ }
+ }
HGraphBuilder builder(callee_graph,
&dex_compilation_unit,
&outer_compilation_unit_,
@@ -1285,7 +1292,7 @@
*code_item,
compiler_driver_,
codegen_,
- inline_stats.get(),
+ inline_stats_,
resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
dex_cache,
handles_);
@@ -1468,6 +1475,11 @@
DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
<< "No instructions can be added to the inner graph during inlining into the outer graph";
+ if (stats_ != nullptr) {
+ DCHECK(inline_stats_ != nullptr);
+ inline_stats_->AddTo(stats_);
+ }
+
return true;
}
@@ -1476,11 +1488,11 @@
const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
- HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+ HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
- InstructionSimplifier simplify(callee_graph, stats_);
- IntrinsicsRecognizer intrinsics(callee_graph, stats_);
+ InstructionSimplifier simplify(callee_graph, inline_stats_);
+ IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
HOptimization* optimizations[] = {
&intrinsics,
@@ -1504,7 +1516,7 @@
dex_compilation_unit,
compiler_driver_,
handles_,
- stats_,
+ inline_stats_,
total_number_of_dex_registers_ + code_item->registers_size_,
depth_ + 1);
inliner.Run();
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 11aacab..75d025a 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -51,7 +51,8 @@
total_number_of_dex_registers_(total_number_of_dex_registers),
depth_(depth),
number_of_inlined_instructions_(0),
- handles_(handles) {}
+ handles_(handles),
+ inline_stats_(nullptr) {}
void Run() OVERRIDE;
@@ -218,6 +219,10 @@
size_t number_of_inlined_instructions_;
VariableSizedHandleScope* const handles_;
+ // Used to record stats about optimizations on the inlined graph.
+ // If the inlining is successful, these stats are merged to the caller graph's stats.
+ OptimizingCompilerStats* inline_stats_;
+
DISALLOW_COPY_AND_ASSIGN(HInliner);
};
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 203b1ec..f7f6a14 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
+#include <atomic>
#include <iomanip>
#include <string>
#include <type_traits>
@@ -74,7 +75,7 @@
public:
OptimizingCompilerStats() {}
- void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+ void RecordStat(MethodCompilationStat stat, uint32_t count = 1) {
compile_stats_[stat] += count;
}
@@ -93,7 +94,7 @@
<< " methods: " << std::fixed << std::setprecision(2)
<< compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
- for (int i = 0; i < kLastStat; i++) {
+ for (size_t i = 0; i < kLastStat; i++) {
if (compile_stats_[i] != 0) {
LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": "
<< compile_stats_[i];
@@ -102,6 +103,21 @@
}
}
+ void AddTo(OptimizingCompilerStats* other_stats) {
+ for (size_t i = 0; i != kLastStat; ++i) {
+ uint32_t count = compile_stats_[i];
+ if (count != 0) {
+ other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count);
+ }
+ }
+ }
+
+ void Reset() {
+ for (size_t i = 0; i != kLastStat; ++i) {
+ compile_stats_[i] = 0u;
+ }
+ }
+
private:
std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
std::string name;
@@ -156,7 +172,7 @@
return "OptStat#" + name;
}
- AtomicInteger compile_stats_[kLastStat];
+ std::atomic<uint32_t> compile_stats_[kLastStat];
DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
};