From ca6fff898afcb62491458ae8bcd428bfb3043da1 Mon Sep 17 00:00:00 2001 From: Vladimir Marko Date: Tue, 3 Oct 2017 14:49:14 +0100 Subject: ART: Use ScopedArenaAllocator for pass-local data. Passes using local ArenaAllocator were hiding their memory usage from the allocation counting, making it difficult to track down where memory was used. Using ScopedArenaAllocator reveals the memory usage. This changes the HGraph constructor which requires a lot of changes in tests. Refactor these tests to limit the amount of work needed the next time we change that constructor. Test: m test-art-host-gtest Test: testrunner.py --host Test: Build with kArenaAllocatorCountAllocations = true. Bug: 64312607 Change-Id: I34939e4086b500d6e827ff3ef2211d1a421ac91a --- compiler/optimizing/optimizing_compiler.cc | 121 +++++++++++++++++------------ 1 file changed, 72 insertions(+), 49 deletions(-) (limited to 'compiler/optimizing/optimizing_compiler.cc') diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 1e06ea86a2..50b870ba8c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -45,6 +45,7 @@ #include "base/dumpable.h" #include "base/macros.h" #include "base/mutex.h" +#include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" @@ -148,7 +149,7 @@ class PassObserver : public ValueObject { cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), - disasm_info_(graph->GetArena()), + disasm_info_(graph->GetAllocator()), visualizer_oss_(), visualizer_output_(visualizer_output), visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), @@ -364,7 +365,8 @@ class OptimizingCompiler FINAL : public Compiler { // 2) Transforms the graph to SSA. Returns null if it failed. // 3) Runs optimizations on the graph, including register allocator. // 4) Generates code with the `code_allocator` provided. - CodeGenerator* TryCompile(ArenaAllocator* arena, + CodeGenerator* TryCompile(ArenaAllocator* allocator, + ArenaStack* arena_stack, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, uint32_t access_flags, @@ -608,7 +610,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, return; } size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; - HInliner* inliner = new (graph->GetArena()) HInliner( + HInliner* inliner = new (graph->GetAllocator()) HInliner( graph, // outer_graph graph, // outermost_graph codegen, @@ -631,17 +633,18 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, PassObserver* pass_observer) const { UNUSED(codegen); // To avoid compilation error when compiling for svelte OptimizingCompilerStats* stats = compilation_stats_.get(); - ArenaAllocator* arena = graph->GetArena(); + ArenaAllocator* allocator = graph->GetAllocator(); switch (instruction_set) { #if defined(ART_ENABLE_CODEGEN_arm) case kThumb2: case kArm: { arm::InstructionSimplifierArm* simplifier = - new (arena) arm::InstructionSimplifierArm(graph, stats); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + new (allocator) arm::InstructionSimplifierArm(graph, stats); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); HInstructionScheduling* scheduling = - new (arena) HInstructionScheduling(graph, instruction_set, codegen); + new (allocator) HInstructionScheduling(graph, instruction_set, codegen); HOptimization* arm_optimizations[] = { simplifier, side_effects, @@ -655,11 +658,12 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { arm64::InstructionSimplifierArm64* simplifier = - new (arena) arm64::InstructionSimplifierArm64(graph, stats); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + new (allocator) arm64::InstructionSimplifierArm64(graph, stats); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); HInstructionScheduling* scheduling = - new (arena) HInstructionScheduling(graph, instruction_set); + new (allocator) HInstructionScheduling(graph, instruction_set); HOptimization* arm64_optimizations[] = { simplifier, side_effects, @@ -673,11 +677,12 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #ifdef ART_ENABLE_CODEGEN_mips case kMips: { mips::InstructionSimplifierMips* simplifier = - new (arena) mips::InstructionSimplifierMips(graph, codegen, stats); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + new (allocator) mips::InstructionSimplifierMips(graph, codegen, stats); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); mips::PcRelativeFixups* pc_relative_fixups = - new (arena) mips::PcRelativeFixups(graph, codegen, stats); + new (allocator) mips::PcRelativeFixups(graph, codegen, stats); HOptimization* mips_optimizations[] = { simplifier, side_effects, @@ -690,8 +695,9 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #endif #ifdef ART_ENABLE_CODEGEN_mips64 case kMips64: { - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); HOptimization* mips64_optimizations[] = { side_effects, gvn, @@ -702,12 +708,13 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); x86::PcRelativeFixups* pc_relative_fixups = - new (arena) x86::PcRelativeFixups(graph, codegen, stats); + new (allocator) x86::PcRelativeFixups(graph, codegen, stats); x86::X86MemoryOperandGeneration* memory_gen = - new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); + new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); HOptimization* x86_optimizations[] = { side_effects, gvn, @@ -720,10 +727,11 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case kX86_64: { - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + SideEffectsAnalysis* side_effects = new (allocator) SideEffectsAnalysis(graph); + GVNOptimization* gvn = + new (allocator) GVNOptimization(graph, *side_effects, "GVN$after_arch"); x86::X86MemoryOperandGeneration* memory_gen = - new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); + new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); HOptimization* x86_64_optimizations[] = { side_effects, gvn, @@ -756,7 +764,9 @@ static void AllocateRegisters(HGraph* graph, } { PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(graph->GetAllocator(), codegen, liveness, strategy); + register_allocator->AllocateRegisters(); } } @@ -767,7 +777,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { OptimizingCompilerStats* stats = compilation_stats_.get(); - ArenaAllocator* arena = graph->GetArena(); + ArenaAllocator* arena = graph->GetAllocator(); if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) { ArenaVector optimizations = BuildOptimizations( *driver->GetCompilerOptions().GetPassesToRun(), @@ -865,7 +875,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, } static ArenaVector EmitAndSortLinkerPatches(CodeGenerator* codegen) { - ArenaVector linker_patches(codegen->GetGraph()->GetArena()->Adapter()); + ArenaVector linker_patches(codegen->GetGraph()->GetAllocator()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); // Sort patches by literal offset. Required for .oat_patches encoding. @@ -912,7 +922,8 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, return compiled_method; } -CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, +CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, + ArenaStack* arena_stack, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, uint32_t access_flags, @@ -970,8 +981,9 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, /* verified_method */ nullptr, dex_cache); - HGraph* graph = new (arena) HGraph( - arena, + HGraph* graph = new (allocator) HGraph( + allocator, + arena_stack, dex_file, method_idx, compiler_driver->GetInstructionSet(), @@ -1024,7 +1036,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, codegen.get(), compilation_stats_.get(), interpreter_metadata, - dex_cache, handles); GraphAnalysisResult result = builder.BuildGraph(); if (result != kAnalysisSuccess) { @@ -1091,11 +1102,12 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, DCHECK(Runtime::Current()->IsAotCompiler()); const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || verifier::CanCompilerHandleVerificationFailure( - verified_method->GetEncounteredVerificationFailures())) { - ArenaAllocator arena(Runtime::Current()->GetArenaPool()); - CodeVectorAllocator code_allocator(&arena); + if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || + verifier::CanCompilerHandleVerificationFailure( + verified_method->GetEncounteredVerificationFailures())) { + ArenaAllocator allocator(Runtime::Current()->GetArenaPool()); + ArenaStack arena_stack(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&allocator); std::unique_ptr codegen; { ScopedObjectAccess soa(Thread::Current()); @@ -1103,7 +1115,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); codegen.reset( - TryCompile(&arena, + TryCompile(&allocator, + &arena_stack, &code_allocator, code_item, access_flags, @@ -1120,12 +1133,16 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (codegen.get() != nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled); - method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); + method = Emit(&allocator, &code_allocator, codegen.get(), compiler_driver, code_item); if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { - MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << dex_file.PrettyMethod(method_idx) << " " << Dumpable(mem_stats); + size_t total_allocated = allocator.BytesAllocated() + arena_stack.PeakBytesAllocated(); + if (total_allocated > kArenaAllocatorMemoryReportThreshold) { + MemStats mem_stats(allocator.GetMemStats()); + MemStats peak_stats(arena_stack.GetPeakStats()); + LOG(INFO) << dex_file.PrettyMethod(method_idx) + << "\n" << Dumpable(mem_stats) + << "\n" << Dumpable(peak_stats); } } } @@ -1200,8 +1217,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, const uint32_t access_flags = method->GetAccessFlags(); const InvokeType invoke_type = method->GetInvokeType(); - ArenaAllocator arena(Runtime::Current()->GetJitArenaPool()); - CodeVectorAllocator code_allocator(&arena); + ArenaAllocator allocator(Runtime::Current()->GetJitArenaPool()); + ArenaStack arena_stack(Runtime::Current()->GetJitArenaPool()); + CodeVectorAllocator code_allocator(&allocator); VariableSizedHandleScope handles(self); std::unique_ptr codegen; @@ -1209,7 +1227,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); codegen.reset( - TryCompile(&arena, + TryCompile(&allocator, + &arena_stack, &code_allocator, code_item, access_flags, @@ -1227,9 +1246,13 @@ bool OptimizingCompiler::JitCompile(Thread* self, } if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { - MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << dex_file->PrettyMethod(method_idx) << " " << Dumpable(mem_stats); + size_t total_allocated = allocator.BytesAllocated() + arena_stack.PeakBytesAllocated(); + if (total_allocated > kArenaAllocatorMemoryReportThreshold) { + MemStats mem_stats(allocator.GetMemStats()); + MemStats peak_stats(arena_stack.GetPeakStats()); + LOG(INFO) << dex_file->PrettyMethod(method_idx) + << "\n" << Dumpable(mem_stats) + << "\n" << Dumpable(peak_stats); } } } @@ -1321,7 +1344,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); } - Runtime::Current()->GetJit()->AddMemoryUsage(method, arena.BytesUsed()); + Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { jit_logger->WriteLog(code, code_allocator.GetSize(), method); } -- cgit v1.2.3-59-g8ed1b