diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/driver/compiler_driver.cc | 517 | ||||
-rw-r--r-- | compiler/driver/compiler_driver.h | 12 | ||||
-rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 381 |
3 files changed, 576 insertions, 334 deletions
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index c617f54f55..b3adf10aea 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -289,7 +289,6 @@ CompilerDriver::CompilerDriver( compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), max_arena_alloc_(0), - compiling_dex_to_dex_(false), dex_to_dex_compiler_(this) { DCHECK(compiler_options_ != nullptr); @@ -450,99 +449,39 @@ static bool InstructionSetHasGenericJniStub(InstructionSet isa) { } } -static void CompileMethod(Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache) { +template <typename CompileFn> +static void CompileMethodHarness( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache, + CompileFn compile_fn) { DCHECK(driver != nullptr); - CompiledMethod* compiled_method = nullptr; + CompiledMethod* compiled_method; uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; MethodReference method_ref(&dex_file, method_idx); - if (driver->GetCompilingDexToDex()) { - optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); - // This is the second pass when we dex-to-dex compile previously marked methods. - // TODO: Refactor the compilation to avoid having to distinguish the two passes - // here. That should be done on a higher level. http://b/29089975 - if (compiler->ShouldCompileMethod(method_ref)) { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - // Do not optimize if a VerifiedMethod is missing. SafeCast elision, - // for example, relies on it. - compiled_method = compiler->CompileMethod( - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - (verified_method != nullptr) - ? dex_to_dex_compilation_level - : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); - } - } else if ((access_flags & kAccNative) != 0) { - // Are we extracting only and have support for generic JNI down calls? - if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && - InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { - // Leaving this empty will trigger the generic JNI version - } else { - // Query any JNI optimization annotations such as @FastNative or @CriticalNative. - access_flags |= annotations::GetNativeMethodAnnotationAccessFlags( - dex_file, dex_file.GetClassDef(class_def_idx), method_idx); + compiled_method = compile_fn(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); - compiled_method = driver->GetCompiler()->JniCompile( - access_flags, method_idx, dex_file, dex_cache); - CHECK(compiled_method != nullptr); - } - } else if ((access_flags & kAccAbstract) != 0) { - // Abstract methods don't have code. - } else { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - bool compile = compilation_enabled && - // Basic checks, e.g., not <clinit>. - results->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadcata. - verified_method != nullptr && - // Do not have failures that should punt to the interpreter. - !verified_method->HasRuntimeThrow() && - (verified_method->GetEncounteredVerificationFailures() & - (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && - // Is eligable for compilation by methods-to-compile filter. - driver->IsMethodToCompile(method_ref) && - driver->ShouldCompileBasedOnProfile(method_ref); - - if (compile) { - // NOTE: if compiler declines to compile this method, it will return null. - compiled_method = driver->GetCompiler()->Compile(code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_cache); - } - if (compiled_method == nullptr && - dex_to_dex_compilation_level != - optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { - DCHECK(!Runtime::Current()->UseJitCompilation()); - DCHECK(!driver->GetCompilingDexToDex()); - // TODO: add a command-line option to disable DEX-to-DEX compilation ? - driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item); - } - } if (kTimeCompileMethod) { uint64_t duration_ns = NanoTime() - start_ns; if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) { @@ -573,6 +512,170 @@ static void CompileMethod(Thread* self, } } +static void CompileMethodDex2Dex( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + auto dex_2_dex_fn = [](Thread* self ATTRIBUTE_UNUSED, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled ATTRIBUTE_UNUSED, + Handle<mirror::DexCache> dex_cache ATTRIBUTE_UNUSED) -> CompiledMethod* { + DCHECK(driver != nullptr); + MethodReference method_ref(&dex_file, method_idx); + + optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); + + if (compiler->ShouldCompileMethod(method_ref)) { + VerificationResults* results = driver->GetVerificationResults(); + DCHECK(results != nullptr); + const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); + // Do not optimize if a VerifiedMethod is missing. SafeCast elision, + // for example, relies on it. + return compiler->CompileMethod( + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + (verified_method != nullptr) + ? dex_to_dex_compilation_level + : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); + } + return nullptr; + }; + CompileMethodHarness(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache, + dex_2_dex_fn); +} + +static void CompileMethodQuick( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + auto quick_fn = []( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + DCHECK(driver != nullptr); + CompiledMethod* compiled_method = nullptr; + MethodReference method_ref(&dex_file, method_idx); + + if ((access_flags & kAccNative) != 0) { + // Are we extracting only and have support for generic JNI down calls? + if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && + InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { + // Leaving this empty will trigger the generic JNI version + } else { + // Query any JNI optimization annotations such as @FastNative or @CriticalNative. + access_flags |= annotations::GetNativeMethodAnnotationAccessFlags( + dex_file, dex_file.GetClassDef(class_def_idx), method_idx); + + compiled_method = driver->GetCompiler()->JniCompile( + access_flags, method_idx, dex_file, dex_cache); + CHECK(compiled_method != nullptr); + } + } else if ((access_flags & kAccAbstract) != 0) { + // Abstract methods don't have code. + } else { + VerificationResults* results = driver->GetVerificationResults(); + DCHECK(results != nullptr); + const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); + bool compile = compilation_enabled && + // Basic checks, e.g., not <clinit>. + results->IsCandidateForCompilation(method_ref, access_flags) && + // Did not fail to create VerifiedMethod metadata. + verified_method != nullptr && + // Do not have failures that should punt to the interpreter. + !verified_method->HasRuntimeThrow() && + (verified_method->GetEncounteredVerificationFailures() & + (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && + // Is eligable for compilation by methods-to-compile filter. + driver->IsMethodToCompile(method_ref) && + driver->ShouldCompileBasedOnProfile(method_ref); + + if (compile) { + // NOTE: if compiler declines to compile this method, it will return null. + compiled_method = driver->GetCompiler()->Compile(code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_cache); + } + if (compiled_method == nullptr && + dex_to_dex_compilation_level != + optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: add a command-line option to disable DEX-to-DEX compilation ? + driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item); + } + } + return compiled_method; + }; + CompileMethodHarness(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache, + quick_fn); +} + void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) { DCHECK(!Runtime::Current()->IsStarted()); jobject jclass_loader; @@ -614,37 +717,34 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t *dex_file, dex_file->GetClassDef(class_def_idx)); - DCHECK(!compiling_dex_to_dex_); - CompileMethod(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); + CompileMethodQuick(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + *dex_file, + dex_to_dex_compilation_level, + true, + dex_cache); const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self); if (num_methods != 0) { DCHECK_EQ(num_methods, 1u); - compiling_dex_to_dex_ = true; - CompileMethod(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - compiling_dex_to_dex_ = false; + CompileMethodDex2Dex(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + *dex_file, + dex_to_dex_compilation_level, + true, + dex_cache); dex_to_dex_compiler_.ClearState(); } @@ -1444,13 +1544,19 @@ class ParallelCompilationManager { void ForAll(size_t begin, size_t end, CompilationVisitor* visitor, size_t work_units) REQUIRES(!*Locks::mutator_lock_) { + ForAllLambda(begin, end, [visitor](size_t index) { visitor->Visit(index); }, work_units); + } + + template <typename Fn> + void ForAllLambda(size_t begin, size_t end, Fn fn, size_t work_units) + REQUIRES(!*Locks::mutator_lock_) { Thread* self = Thread::Current(); self->AssertNoPendingException(); CHECK_GT(work_units, 0U); index_.StoreRelaxed(begin); for (size_t i = 0; i < work_units; ++i) { - thread_pool_->AddTask(self, new ForAllClosure(this, end, visitor)); + thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn)); } thread_pool_->StartWorkers(self); @@ -1470,32 +1576,33 @@ class ParallelCompilationManager { } private: - class ForAllClosure : public Task { + template <typename Fn> + class ForAllClosureLambda : public Task { public: - ForAllClosure(ParallelCompilationManager* manager, size_t end, CompilationVisitor* visitor) + ForAllClosureLambda(ParallelCompilationManager* manager, size_t end, Fn fn) : manager_(manager), end_(end), - visitor_(visitor) {} + fn_(fn) {} - virtual void Run(Thread* self) { + void Run(Thread* self) OVERRIDE { while (true) { const size_t index = manager_->NextIndex(); if (UNLIKELY(index >= end_)) { break; } - visitor_->Visit(index); + fn_(index); self->AssertNoPendingException(); } } - virtual void Finalize() { + void Finalize() OVERRIDE { delete this; } private: ParallelCompilationManager* const manager_; const size_t end_; - CompilationVisitor* const visitor_; + Fn fn_; }; AtomicInteger index_; @@ -2574,64 +2681,33 @@ void CompilerDriver::InitializeClasses(jobject class_loader, } } -void CompilerDriver::Compile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " << - ((profile_compilation_info_ == nullptr) - ? "null" - : profile_compilation_info_->DumpInfo(&dex_files)); - } - - dex_to_dex_compiler_.ClearState(); - compiling_dex_to_dex_ = false; - for (const DexFile* dex_file : dex_files) { - CHECK(dex_file != nullptr); - CompileDexFile(class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); - const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool(); - const size_t arena_alloc = arena_pool->GetBytesAllocated(); - max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_); - Runtime::Current()->ReclaimArenaPoolMemory(); - } - - if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) { - compiling_dex_to_dex_ = true; - // TODO: Not visit all of the dex files, its probably rare that only one would have quickened - // methods though. - for (const DexFile* dex_file : dex_files) { - CompileDexFile(class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); - } - dex_to_dex_compiler_.ClearState(); - compiling_dex_to_dex_ = false; - } - - VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); -} - -class CompileClassVisitor : public CompilationVisitor { - public: - explicit CompileClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} +template <typename CompileFn> +static void CompileDexFile(CompilerDriver* driver, + jobject class_loader, + const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings, + const char* timing_name, + CompileFn compile_fn) { + TimingLogger::ScopedTiming t(timing_name, timings); + ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), + class_loader, + driver, + &dex_file, + dex_files, + thread_pool); - virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + auto compile = [&context, &compile_fn](size_t class_def_index) { ScopedTrace trace(__FUNCTION__); - const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile& dex_file = *context.GetDexFile(); const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ClassLinker* class_linker = manager_->GetClassLinker(); - jobject jclass_loader = manager_->GetClassLoader(); + ClassLinker* class_linker = context.GetClassLinker(); + jobject jclass_loader = context.GetClassLoader(); ClassReference ref(&dex_file, class_def_index); // Skip compiling classes with generic verifier failures since they will still fail at runtime - if (manager_->GetCompiler()->verification_results_->IsClassRejected(ref)) { + if (context.GetCompiler()->GetVerificationResults()->IsClassRejected(ref)) { return; } // Use a scoped object access to perform to the quick SkipClass check. @@ -2662,7 +2738,7 @@ class CompileClassVisitor : public CompilationVisitor { // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); - CompilerDriver* const driver = manager_->GetCompiler(); + CompilerDriver* const driver = context.GetCompiler(); // Can we run DEX-to-DEX compiler on this class ? optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = @@ -2685,38 +2761,71 @@ class CompileClassVisitor : public CompilationVisitor { continue; } previous_method_idx = method_idx; - CompileMethod(soa.Self(), - driver, - it.GetMethodCodeItem(), - it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), - class_def_index, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache); + compile_fn(soa.Self(), + driver, + it.GetMethodCodeItem(), + it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), + class_def_index, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); it.Next(); } DCHECK(!it.HasNext()); + }; + context.ForAllLambda(0, dex_file.NumClassDefs(), compile, thread_count); +} + +void CompilerDriver::Compile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + if (kDebugProfileGuidedCompilation) { + LOG(INFO) << "[ProfileGuidedCompilation] " << + ((profile_compilation_info_ == nullptr) + ? "null" + : profile_compilation_info_->DumpInfo(&dex_files)); } - private: - const ParallelCompilationManager* const manager_; -}; + dex_to_dex_compiler_.ClearState(); + for (const DexFile* dex_file : dex_files) { + CHECK(dex_file != nullptr); + CompileDexFile(this, + class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings, + "Compile Dex File Quick", + CompileMethodQuick); + const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool(); + const size_t arena_alloc = arena_pool->GetBytesAllocated(); + max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_); + Runtime::Current()->ReclaimArenaPoolMemory(); + } -void CompilerDriver::CompileDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) { - TimingLogger::ScopedTiming t("Compile Dex File", timings); - ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, - &dex_file, dex_files, thread_pool); - CompileClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); + if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) { + // TODO: Not visit all of the dex files, its probably rare that only one would have quickened + // methods though. + for (const DexFile* dex_file : dex_files) { + CompileDexFile(this, + class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings, + "Compile Dex File Dex2Dex", + CompileMethodDex2Dex); + } + dex_to_dex_compiler_.ClearState(); + } + + VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index b51e0debbb..e32b5c4fd3 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -374,10 +374,6 @@ class CompilerDriver { || android::base::EndsWith(boot_image_filename, "core-optimizing.art"); } - bool GetCompilingDexToDex() const { - return compiling_dex_to_dex_; - } - optimizer::DexToDexCompiler& GetDexToDexCompiler() { return dex_to_dex_compiler_; } @@ -449,13 +445,6 @@ class CompilerDriver { void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings); - void CompileDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; @@ -541,7 +530,6 @@ class CompilerDriver { size_t max_arena_alloc_; // Compiler for dex to dex (quickening). - bool compiling_dex_to_dex_; optimizer::DexToDexCompiler dex_to_dex_compiler_; friend class CompileClassVisitor; diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 88326d321b..8b4eae1780 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -25,6 +25,51 @@ #include <iostream> +/** + * The general algorithm of load-store elimination (LSE). + * Load-store analysis in the previous pass collects a list of heap locations + * and does alias analysis of those heap locations. + * LSE keeps track of a list of heap values corresponding to the heap + * locations. It visits basic blocks in reverse post order and for + * each basic block, visits instructions sequentially, and processes + * instructions as follows: + * - If the instruction is a load, and the heap location for that load has a + * valid heap value, the load can be eliminated. In order to maintain the + * validity of all heap locations during the optimization phase, the real + * elimination is delayed till the end of LSE. + * - If the instruction is a store, it updates the heap value for the heap + * location of the store with the store instruction. The real heap value + * can be fetched from the store instruction. Heap values are invalidated + * for heap locations that may alias with the store instruction's heap + * location. The store instruction can be eliminated unless the value stored + * is later needed e.g. by a load from the same/aliased heap location or + * the heap location persists at method return/deoptimization. + * The store instruction is also needed if it's not used to track the heap + * value anymore, e.g. when it fails to merge with the heap values from other + * predecessors. + * - A store that stores the same value as the heap value is eliminated. + * - The list of heap values are merged at basic block entry from the basic + * block's predecessors. The algorithm is single-pass, so loop side-effects is + * used as best effort to decide if a heap location is stored inside the loop. + * - A special type of objects called singletons are instantiated in the method + * and have a single name, i.e. no aliases. Singletons have exclusive heap + * locations since they have no aliases. Singletons are helpful in narrowing + * down the life span of a heap location such that they do not always + * need to participate in merging heap values. Allocation of a singleton + * can be eliminated if that singleton is not used and does not persist + * at method return/deoptimization. + * - For newly instantiated instances, their heap values are initialized to + * language defined default values. + * - Some instructions such as invokes are treated as loading and invalidating + * all the heap values, depending on the instruction's side effects. + * - Finalizable objects are considered as persisting at method + * return/deoptimization. + * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad + * and VecStore instructions. + * - Currently this LSE algorithm doesn't handle graph with try-catch, due to + * the special block merging structure. + */ + namespace art { // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. @@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor { removed_loads_(allocator_.Adapter(kArenaAllocLSE)), substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)), possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) { + singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor { return type_conversion; } - // Find an instruction's substitute if it should be removed. + // Find an instruction's substitute if it's a removed load. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { + if (!IsLoad(instruction)) { + return instruction; + } size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { if (removed_loads_[i] == instruction) { - return substitute_instructions_for_loads_[i]; + HInstruction* substitute = substitute_instructions_for_loads_[i]; + // The substitute list is a flat hierarchy. + DCHECK_EQ(FindSubstitute(substitute), substitute); + return substitute; } } return instruction; } void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK(IsLoad(load)); DCHECK_EQ(FindSubstitute(heap_value), heap_value) << "Unexpected heap_value that has a substitute " << heap_value->DebugName(); removed_loads_.push_back(load); @@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor { new_instance->GetBlock()->RemoveInstruction(new_instance); } } - for (HInstruction* new_array : singleton_new_arrays_) { - size_t removed = HConstructorFence::RemoveConstructorFences(new_array); - MaybeRecordStat(stats_, - MethodCompilationStat::kConstructorFenceRemovedLSE, - removed); + } - if (!new_array->HasNonEnvironmentUses()) { - new_array->RemoveEnvironmentUsers(); - new_array->GetBlock()->RemoveInstruction(new_array); - } + private: + static bool IsLoad(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; } + // Unresolved load is not treated as a load. + return instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet(); } - private: - // If heap_values[index] is an instance field store, need to keep the store. - // This is necessary if a heap value is killed due to merging, or loop side - // effects (which is essentially merging also), since a load later from the - // location won't be eliminated. + static bool IsStore(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; + } + // Unresolved store is not treated as a store. + return instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->IsStaticFieldSet(); + } + + // Returns the real heap value by finding its substitute or by "peeling" + // a store instruction. + HInstruction* GetRealHeapValue(HInstruction* heap_value) { + if (IsLoad(heap_value)) { + return FindSubstitute(heap_value); + } + if (!IsStore(heap_value)) { + return heap_value; + } + + // We keep track of store instructions as the heap values which might be + // eliminated if the stores are later found not necessary. The real stored + // value needs to be fetched from the store instruction. + if (heap_value->IsInstanceFieldSet()) { + heap_value = heap_value->AsInstanceFieldSet()->GetValue(); + } else if (heap_value->IsStaticFieldSet()) { + heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else { + DCHECK(heap_value->IsArraySet()); + heap_value = heap_value->AsArraySet()->GetValue(); + } + // heap_value may already be a removed load. + return FindSubstitute(heap_value); + } + + // If heap_value is a store, need to keep the store. + // This is necessary if a heap value is killed or replaced by another value, + // so that the store is no longer used to track heap value. void KeepIfIsStore(HInstruction* heap_value) { - if (heap_value == kDefaultHeapValue || - heap_value == kUnknownHeapValue || - !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { + if (!IsStore(heap_value)) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor { } } + // If a heap location X may alias with heap location at `loc_index` + // and heap_values of that heap location X holds a store, keep that store. + // It's needed for a dependent load that's not eliminated since any store + // that may put value into the load's heap location needs to be kept. + void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values, + size_t loc_index) { + for (size_t i = 0; i < heap_values.size(); i++) { + if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) { + KeepIfIsStore(heap_values[i]); + } + } + } + void HandleLoopSideEffects(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ScopedArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; - // Don't eliminate loads in irreducible loops. This is safe for singletons, because - // they are always used by the non-eliminated loop-phi. + // Don't eliminate loads in irreducible loops. + // Also keep the stores before the loop. if (block->GetLoopInformation()->IsIrreducible()) { if (kIsDebugBuild) { for (size_t i = 0; i < heap_values.size(); i++) { DCHECK_EQ(heap_values[i], kUnknownHeapValue); } } + for (size_t i = 0; i < heap_values.size(); i++) { + KeepIfIsStore(pre_header_heap_values[i]); + } return; } - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ScopedArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; @@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor { for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* ref_info = location->GetReferenceInfo(); - if (ref_info->IsSingletonAndRemovable() && - !location->IsValueKilledByLoopSideEffects()) { - // A removable singleton's field that's not stored into inside a loop is + if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) { + // A singleton's field that's not stored into inside a loop is // invariant throughout the loop. Nothing to do. } else { - // heap value is killed by loop side effects (stored into directly, or - // due to aliasing). Or the heap value may be needed after method return - // or deoptimization. + // heap value is killed by loop side effects. KeepIfIsStore(pre_header_heap_values[i]); heap_values[i] = kUnknownHeapValue; } } + } else { + // The loop doesn't kill any value. } } @@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* merged_value = nullptr; + // If we can merge the store itself from the predecessors, we keep + // the store as the heap value as long as possible. In case we cannot + // merge the store, we try to merge the values of the stores. + HInstruction* merged_store_value = nullptr; // Whether merged_value is a result that's merged from all predecessors. bool from_all_predecessors = true; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* ref = ref_info->GetReference(); HInstruction* singleton_ref = nullptr; if (ref_info->IsSingleton()) { - // We do more analysis of liveness when merging heap values for such - // cases since stores into such references may potentially be eliminated. - singleton_ref = ref_info->GetReference(); + // We do more analysis based on singleton's liveness when merging + // heap values for such cases. + singleton_ref = ref; } for (HBasicBlock* predecessor : predecessors) { HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if (!IsStore(pred_value)) { + pred_value = FindSubstitute(pred_value); + } + DCHECK(pred_value != nullptr); + HInstruction* pred_store_value = GetRealHeapValue(pred_value); if ((singleton_ref != nullptr) && !singleton_ref->GetBlock()->Dominates(predecessor)) { - // singleton_ref is not live in this predecessor. Skip this predecessor since - // it does not really have the location. + // singleton_ref is not live in this predecessor. No need to merge + // since singleton_ref is not live at the beginning of this block. DCHECK_EQ(pred_value, kUnknownHeapValue); from_all_predecessors = false; - continue; + break; } if (merged_value == nullptr) { // First seen heap value. + DCHECK(pred_value != nullptr); merged_value = pred_value; } else if (pred_value != merged_value) { // There are conflicting values. merged_value = kUnknownHeapValue; + // We may still be able to merge store values. + } + + // Conflicting stores may be storing the same value. We do another merge + // of real stored values. + if (merged_store_value == nullptr) { + // First seen store value. + DCHECK(pred_store_value != nullptr); + merged_store_value = pred_store_value; + } else if (pred_store_value != merged_store_value) { + // There are conflicting store values. + merged_store_value = kUnknownHeapValue; + // There must be conflicting stores also. + DCHECK_EQ(merged_value, kUnknownHeapValue); + // No need to merge anymore. break; } } - if (ref_info->IsSingleton()) { - if (ref_info->IsSingletonAndNonRemovable() || - (merged_value == kUnknownHeapValue && - !block->IsSingleReturnOrReturnVoidAllowingPhis())) { - // The heap value may be needed after method return or deoptimization, - // or there are conflicting heap values from different predecessors and - // this block is not a single return, - // keep the last store in each predecessor since future loads may not - // be eliminated. + if (merged_value == nullptr) { + DCHECK(!from_all_predecessors); + DCHECK(singleton_ref != nullptr); + } + if (from_all_predecessors) { + if (ref_info->IsSingletonAndRemovable() && + block->IsSingleReturnOrReturnVoidAllowingPhis()) { + // Values in the singleton are not needed anymore. + } else if (!IsStore(merged_value)) { + // We don't track merged value as a store anymore. We have to + // hold the stores in predecessors live here. for (HBasicBlock* predecessor : predecessors) { ScopedArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; @@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor { } } } else { - // Currenctly we don't eliminate stores to non-singletons. + DCHECK(singleton_ref != nullptr); + // singleton_ref is non-existing at the beginning of the block. There is + // no need to keep the stores. } - if ((merged_value == nullptr) || !from_all_predecessors) { + if (!from_all_predecessors) { DCHECK(singleton_ref != nullptr); DCHECK((singleton_ref->GetBlock() == block) || - !singleton_ref->GetBlock()->Dominates(block)); + !singleton_ref->GetBlock()->Dominates(block)) + << "method: " << GetGraph()->GetMethodName(); // singleton_ref is not defined before block or defined only in some of its // predecessors, so block doesn't really have the location at its entry. heap_values[i] = kUnknownHeapValue; - } else { + } else if (predecessors.size() == 1) { + // Inherit heap value from the single predecessor. + DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value); heap_values[i] = merged_value; + } else { + DCHECK(merged_value == kUnknownHeapValue || + merged_value == kDefaultHeapValue || + merged_value->GetBlock()->Dominates(block)); + if (merged_value != kUnknownHeapValue) { + heap_values[i] = merged_value; + } else { + // Stores in different predecessors may be storing the same value. + heap_values[i] = merged_store_value; + } } } } @@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue) { - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same - // field/element that a "removed" store puts the value. That store - // must be to a singleton's field/element. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); - // heap_value may already have a substitute. - heap_value = FindSubstitute(heap_value); - } - } + heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); } else { if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { // The only situation where the same heap location has different type is when @@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); } + // Load isn't eliminated. Put the load as the value into the HeapLocation. + // This acts like GVN but with better aliasing analysis. + heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); return; } AddRemovedLoad(instruction, heap_value); @@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor { } bool Equal(HInstruction* heap_value, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + if (heap_value == kUnknownHeapValue) { + // Don't compare kUnknownHeapValue with other values. + return false; + } if (heap_value == value) { return true; } if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) { return true; } + HInstruction* real_heap_value = GetRealHeapValue(heap_value); + if (real_heap_value != heap_value) { + return Equal(real_heap_value, value); + } return false; } @@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor { size_t vector_length, int16_t declaring_class_def_index, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); @@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool same_value = false; bool possibly_redundant = false; + if (Equal(heap_value, value)) { // Store into the heap location with the same value. - same_value = true; - } else if (index != nullptr && - heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) { - // For array element, don't eliminate stores if the location can be aliased - // (due to either ref or index aliasing). - } else if (ref_info->IsSingleton()) { - // Store into a field/element of a singleton. The value cannot be killed due to - // aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. - // Stores whose values may be needed after method return or deoptimization - // are also removed from possibly_removed_stores_ when that is detected. - possibly_redundant = true; + // This store can be eliminated right away. + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else { HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - // instruction is a store in the loop so the loop must does write. + if (loop_info == nullptr) { + // Store is not in a loop. We try to precisely track the heap value by + // the store. + possibly_redundant = true; + } else if (!loop_info->IsIrreducible()) { + // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - - if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { - DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); - // Keep the store since its value may be needed at the loop header. - possibly_redundant = false; - } else { - // The singleton is created inside the loop. Value stored to it isn't needed at + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. + possibly_redundant = true; + } else { + // Keep the store since its value may be needed at the loop header. } + } else { + // Keep the store inside irreducible loops. } } - if (same_value || possibly_redundant) { + if (possibly_redundant) { possibly_removed_stores_.push_back(instruction); } - if (!same_value) { - if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); - // Put the store as the heap value. If the value is loaded from heap - // by a load later, this store isn't really redundant. - heap_values[idx] = instruction; - } else { - heap_values[idx] = value; - } - } + // Put the store as the heap value. If the value is loaded or needed after + // return/deoptimization later, this store isn't really redundant. + heap_values[idx] = instruction; + // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { if (i == idx) { continue; } - if (heap_values[i] == value) { + if (Equal(heap_values[i], value)) { // Same value should be kept even if aliasing happens. continue; } @@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor { continue; } if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias. + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { - // Filter out fake instructions before checking instruction kind below. - if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) { - continue; - } // A store is kept as the heap value for possibly removed stores. - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - // Check whether the reference for a store is used by an environment local of - // HDeoptimize. + // That value stored is generally observeable after deoptimization, except + // for singletons that don't escape after deoptimization. + if (IsStore(heap_value)) { + if (heap_value->IsStaticFieldSet()) { + KeepIfIsStore(heap_value); + continue; + } HInstruction* reference = heap_value->InputAt(0); - DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()); - for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { - HEnvironment* user = use.GetUser(); - if (user->GetHolder() == instruction) { - // The singleton for the store is visible at this deoptimization - // point. Need to keep the store so that the heap value is - // seen by the interpreter. + if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) { + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable objects alway escape. KeepIfIsStore(heap_value); + continue; + } + // Check whether the reference for a store is used by an environment local of + // HDeoptimize. If not, the singleton is not observed after + // deoptimizion. + for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder() == instruction) { + // The singleton for the store is visible at this deoptimization + // point. Need to keep the store so that the heap value is + // seen by the interpreter. + KeepIfIsStore(heap_value); + } } + } else { + KeepIfIsStore(heap_value); } } } @@ -691,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor { // Singleton references cannot be seen by the callee. } else { if (side_effects.DoesAnyRead()) { + // Invocation may read the heap value. KeepIfIsStore(heap_values[i]); } if (side_effects.DoesAnyWrite()) { + // Keep the store since it's not used to track the heap value anymore. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -758,7 +904,7 @@ class LSEVisitor : public HGraphDelegateVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_arrays_.push_back(new_array); + singleton_new_instances_.push_back(new_array); } ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; @@ -791,7 +937,6 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*> possibly_removed_stores_; ScopedArenaVector<HInstruction*> singleton_new_instances_; - ScopedArenaVector<HInstruction*> singleton_new_arrays_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; |