From 1e065a54845da12541572f4f149e6ab0dcd20180 Mon Sep 17 00:00:00 2001 From: Igor Murashkin Date: Wed, 9 Aug 2017 13:20:34 -0700 Subject: optimizing: Refactor statistics to use OptimizingCompilerStats helper Remove all copies of 'MaybeRecordStat', replacing them with a single OptimizingCompilerStats::MaybeRecordStat helper. Change-Id: I83b96b41439dccece3eee2e159b18c95336ea933 --- compiler/optimizing/optimizing_compiler.cc | 66 ++++++++++++++++++------------ 1 file changed, 40 insertions(+), 26 deletions(-) (limited to 'compiler/optimizing/optimizing_compiler.cc') diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 76a243f793..70bbc382b4 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -329,12 +329,6 @@ class OptimizingCompiler FINAL : public Compiler { void UnInit() const OVERRIDE; - void MaybeRecordStat(MethodCompilationStat compilation_stat) const { - if (compilation_stats_.get() != nullptr) { - compilation_stats_->RecordStat(compilation_stat); - } - } - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, @@ -512,7 +506,8 @@ static HOptimization* BuildOptimization( } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { CHECK(most_recent_side_effects != nullptr); CHECK(most_recent_lsa != nullptr); - return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa); + return + new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa, stats); } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { @@ -778,7 +773,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa); + LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats); HSharpening* sharpening = new (arena) HSharpening( graph, codegen, dex_compilation_unit, driver, handles); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( @@ -894,7 +889,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, ArtMethod* method, bool osr, VariableSizedHandleScope* handles) const { - MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -904,12 +900,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, // Do not attempt to compile on architectures we do not support. if (!IsInstructionSetSupported(instruction_set)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledUnsupportedIsa); return nullptr; } if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledPathological); return nullptr; } @@ -919,7 +917,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledSpaceFilter); return nullptr; } @@ -966,7 +965,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions(), compilation_stats_.get())); if (codegen.get() == nullptr) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } codegen->GetAssembler()->cfi().SetEnabled( @@ -995,17 +995,25 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, GraphAnalysisResult result = builder.BuildGraph(); if (result != kAnalysisSuccess) { switch (result) { - case kAnalysisSkipped: - MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped); + case kAnalysisSkipped: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledSkipped); + } break; - case kAnalysisInvalidBytecode: - MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode); + case kAnalysisInvalidBytecode: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledInvalidBytecode); + } break; - case kAnalysisFailThrowCatchLoop: - MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + case kAnalysisFailThrowCatchLoop: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledThrowCatchLoop); + } break; - case kAnalysisFailAmbiguousArrayOp: - MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + case kAnalysisFailAmbiguousArrayOp: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + } break; case kAnalysisSuccess: UNREACHABLE(); @@ -1024,7 +1032,10 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, RegisterAllocator::Strategy regalloc_strategy = compiler_options.GetRegisterAllocationStrategy(); - AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy); + AllocateRegisters(graph, + codegen.get(), + &pass_observer, + regalloc_strategy); codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); @@ -1072,7 +1083,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &handles)); } if (codegen.get() != nullptr) { - MaybeRecordStat(MethodCompilationStat::kCompiled); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kCompiled); method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); if (kArenaAllocatorCountAllocations) { @@ -1083,11 +1095,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } } } else { + MethodCompilationStat method_stat; if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime; } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError); + method_stat = MethodCompilationStat::kNotCompiledVerificationError; } + MaybeRecordStat(compilation_stats_.get(), method_stat); } if (kIsDebugBuild && @@ -1221,7 +1235,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, if (stack_map_data == nullptr || roots_data == nullptr) { return false; } - MaybeRecordStat(MethodCompilationStat::kCompiled); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled); codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), MemoryRegion(method_info_data, method_info_size), *code_item); -- cgit v1.2.3-59-g8ed1b From 6ef45677305048c2bf0600f1c4b98a11b2cfaffb Mon Sep 17 00:00:00 2001 From: Igor Murashkin Date: Tue, 8 Aug 2017 13:59:55 -0700 Subject: optimizing: Add statistics for # of constructor fences added/removed Statistics are attributed as follows: Added because: * HNewInstances requires a HConstructorFence following it. * HReturn requires a HConstructorFence (for final fields) preceding it. Removed because: * Optimized in Load-Store-Elimination. * Optimized in Prepare-For-Register-Allocation. Test: art/test.py Bug: 36656456 Change-Id: Ic119441c5151a5a840fc6532b411340e2d68e5eb --- compiler/optimizing/instruction_builder.cc | 6 ++++++ compiler/optimizing/load_store_elimination.cc | 17 ++++++++++++----- compiler/optimizing/nodes.cc | 8 +++++++- compiler/optimizing/nodes.h | 16 +++++++++++++--- compiler/optimizing/optimizing_compiler.cc | 8 +++++--- compiler/optimizing/optimizing_compiler_stats.h | 9 +++++++++ compiler/optimizing/prepare_for_register_allocation.cc | 4 +++- compiler/optimizing/prepare_for_register_allocation.h | 6 +++++- 8 files changed, 60 insertions(+), 14 deletions(-) (limited to 'compiler/optimizing/optimizing_compiler.cc') diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index b66883f6ad..ca3b191cb0 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -664,6 +664,9 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, DCHECK(fence_target != nullptr); AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_)); + MaybeRecordStat( + compilation_stats_, + MethodCompilationStat::kConstructorFenceGeneratedFinal); } AppendInstruction(new (arena_) HReturnVoid(dex_pc)); } else { @@ -1034,6 +1037,9 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc HConstructorFence* ctor_fence = new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_); AppendInstruction(ctor_fence); + MaybeRecordStat( + compilation_stats_, + MethodCompilationStat::kConstructorFenceGeneratedNew); } static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index fddda3d8c0..98b859210c 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -40,8 +40,9 @@ class LSEVisitor : public HGraphVisitor { public: LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_locations_collector, - const SideEffectsAnalysis& side_effects) - : HGraphVisitor(graph), + const SideEffectsAnalysis& side_effects, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph, stats), heap_location_collector_(heap_locations_collector), side_effects_(side_effects), heap_values_for_(graph->GetBlocks().size(), @@ -100,7 +101,10 @@ class LSEVisitor : public HGraphVisitor { // * - Constructor fences (they never escape this thread). // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { - HConstructorFence::RemoveConstructorFences(new_instance); + size_t removed = HConstructorFence::RemoveConstructorFences(new_instance); + MaybeRecordStat(stats_, + MethodCompilationStat::kConstructorFenceRemovedLSE, + removed); if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); @@ -108,7 +112,10 @@ class LSEVisitor : public HGraphVisitor { } } for (HInstruction* new_array : singleton_new_arrays_) { - HConstructorFence::RemoveConstructorFences(new_array); + size_t removed = HConstructorFence::RemoveConstructorFences(new_array); + MaybeRecordStat(stats_, + MethodCompilationStat::kConstructorFenceRemovedLSE, + removed); if (!new_array->HasNonEnvironmentUses()) { new_array->RemoveEnvironmentUsers(); @@ -663,7 +670,7 @@ void LoadStoreElimination::Run() { return; } - LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); + LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3a1864b2ae..8644f676e8 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1198,11 +1198,14 @@ void HVariableInputSizeInstruction::RemoveAllInputs() { DCHECK_EQ(0u, InputCount()); } -void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { +size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { DCHECK(instruction->GetBlock() != nullptr); // Removing constructor fences only makes sense for instructions with an object return type. DCHECK_EQ(Primitive::kPrimNot, instruction->GetType()); + // Return how many instructions were removed for statistic purposes. + size_t remove_count = 0; + // Efficient implementation that simultaneously (in one pass): // * Scans the uses list for all constructor fences. // * Deletes that constructor fence from the uses list of `instruction`. @@ -1250,6 +1253,7 @@ void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { // is removed. if (ctor_fence->InputCount() == 0u) { ctor_fence->GetBlock()->RemoveInstruction(ctor_fence); + ++remove_count; } } } @@ -1263,6 +1267,8 @@ void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { } CHECK(instruction->GetBlock() != nullptr); } + + return remove_count; } HInstruction* HConstructorFence::GetAssociatedAllocation() { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e4431422b2..29be8acc0d 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -6630,7 +6630,9 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // This must *not* be called during/after prepare_for_register_allocation, // because that removes all the inputs to the fences but the fence is actually // still considered live. - static void RemoveConstructorFences(HInstruction* instruction); + // + // Returns how many HConstructorFence instructions were removed from graph. + static size_t RemoveConstructorFences(HInstruction* instruction); // Check if this constructor fence is protecting // an HNewInstance or HNewArray that is also the immediate @@ -6878,9 +6880,13 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { namespace art { +class OptimizingCompilerStats; + class HGraphVisitor : public ValueObject { public: - explicit HGraphVisitor(HGraph* graph) : graph_(graph) {} + explicit HGraphVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr) + : stats_(stats), + graph_(graph) {} virtual ~HGraphVisitor() {} virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {} @@ -6902,6 +6908,9 @@ class HGraphVisitor : public ValueObject { #undef DECLARE_VISIT_INSTRUCTION + protected: + OptimizingCompilerStats* stats_; + private: HGraph* const graph_; @@ -6910,7 +6919,8 @@ class HGraphVisitor : public ValueObject { class HGraphDelegateVisitor : public HGraphVisitor { public: - explicit HGraphDelegateVisitor(HGraph* graph) : HGraphVisitor(graph) {} + explicit HGraphDelegateVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr) + : HGraphVisitor(graph, stats) {} virtual ~HGraphDelegateVisitor() {} // Visit functions that delegate to to super class. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 70bbc382b4..435ca1cad4 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -711,11 +711,12 @@ NO_INLINE // Avoid increasing caller's frame size by large stack-allocated obje static void AllocateRegisters(HGraph* graph, CodeGenerator* codegen, PassObserver* pass_observer, - RegisterAllocator::Strategy strategy) { + RegisterAllocator::Strategy strategy, + OptimizingCompilerStats* stats) { { PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName, pass_observer); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, stats).Run(); } SsaLivenessAnalysis liveness(graph, codegen); { @@ -1035,7 +1036,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, AllocateRegisters(graph, codegen.get(), &pass_observer, - regalloc_strategy); + regalloc_strategy, + compilation_stats_.get()); codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 098d23dac1..d6da73cc1c 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -23,6 +23,7 @@ #include #include "atomic.h" +#include "globals.h" namespace art { @@ -86,6 +87,10 @@ enum MethodCompilationStat { kNotInlinedWont, kNotInlinedRecursiveBudget, kNotInlinedProxy, + kConstructorFenceGeneratedNew, + kConstructorFenceGeneratedFinal, + kConstructorFenceRemovedLSE, + kConstructorFenceRemovedPFRA, kLastStat }; @@ -202,6 +207,10 @@ class OptimizingCompilerStats { case kNotInlinedWont: name = "NotInlinedWont"; break; case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break; case kNotInlinedProxy: name = "NotInlinedProxy"; break; + case kConstructorFenceGeneratedNew: name = "ConstructorFenceGeneratedNew"; break; + case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break; + case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break; + case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break; case kLastStat: LOG(FATAL) << "invalid stat " diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 7c6b69fb2f..5de707a50f 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -17,6 +17,7 @@ #include "prepare_for_register_allocation.h" #include "jni_internal.h" +#include "optimizing_compiler_stats.h" #include "well_known_classes.h" namespace art { @@ -190,8 +191,9 @@ void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* cons // TODO: GetAssociatedAllocation should not care about multiple inputs // if we are in prepare_for_register_allocation pass only. constructor_fence->GetBlock()->RemoveInstruction(constructor_fence); + MaybeRecordStat(stats_, + MethodCompilationStat::kConstructorFenceRemovedPFRA); return; - // TODO: actually remove the dmb from the .S entrypoints (initialized variants only). } // HNewArray does not need this check because the art_quick_alloc_array does not itself diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 395d4ba2ee..2c64f016c1 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -21,6 +21,8 @@ namespace art { +class OptimizingCompilerStats; + /** * A simplification pass over the graph before doing register allocation. * For example it changes uses of null checks and bounds checks to the original @@ -28,7 +30,9 @@ namespace art { */ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { public: - explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphDelegateVisitor(graph) {} + explicit PrepareForRegisterAllocation(HGraph* graph, + OptimizingCompilerStats* stats = nullptr) + : HGraphDelegateVisitor(graph, stats) {} void Run(); -- cgit v1.2.3-59-g8ed1b