diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/compiled_method.cc | 6 | ||||
| -rw-r--r-- | compiler/compiled_method.h | 6 | ||||
| -rw-r--r-- | compiler/debug/elf_symtab_writer.h | 29 | ||||
| -rw-r--r-- | compiler/dex/dex_to_dex_compiler.cc | 206 | ||||
| -rw-r--r-- | compiler/dex/dex_to_dex_compiler.h | 97 | ||||
| -rw-r--r-- | compiler/driver/compiler_driver.cc | 528 | ||||
| -rw-r--r-- | compiler/driver/compiler_driver.h | 13 | ||||
| -rw-r--r-- | compiler/jit/jit_compiler.cc | 2 | ||||
| -rw-r--r-- | compiler/linker/elf_builder.h | 20 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 17 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 66 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 66 | ||||
| -rw-r--r-- | compiler/optimizing/dead_code_elimination.cc | 64 | ||||
| -rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 381 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 2 | ||||
| -rw-r--r-- | compiler/utils/atomic_dex_ref_map-inl.h | 12 | ||||
| -rw-r--r-- | compiler/utils/atomic_dex_ref_map.h | 3 |
17 files changed, 924 insertions, 594 deletions
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 0f69dbab94..e41371855d 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -159,10 +159,4 @@ CompiledMethod::~CompiledMethod() { storage->ReleaseMethodInfo(method_info_); } -void CompiledMethod::ReleaseVMapTable() { - CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage(); - storage->ReleaseVMapTable(vmap_table_); - vmap_table_ = nullptr; -} - } // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 4e8f3efe5a..acdce260e5 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -168,10 +168,6 @@ class CompiledMethod FINAL : public CompiledCode { ArrayRef<const linker::LinkerPatch> GetPatches() const; - // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap - // table to avoid writing the quicken info to the vdex file. - void ReleaseVMapTable(); - private: static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits; static constexpr size_t kIsIntrinsicSize = 1u; @@ -190,7 +186,7 @@ class CompiledMethod FINAL : public CompiledCode { // For quick code, method specific information that is not very dedupe friendly (method indices). const LengthPrefixedArray<uint8_t>* const method_info_; // For quick code, holds code infos which contain stack maps, inline information, and etc. - const LengthPrefixedArray<uint8_t>* vmap_table_; + const LengthPrefixedArray<uint8_t>* const vmap_table_; // For quick code, a FDE entry for the debug_frame section. const LengthPrefixedArray<uint8_t>* const cfi_info_; // For quick code, linker patches needed by the method. diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index 9c9e8b35b8..4b19547d28 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -101,7 +101,7 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, } } } - // Add symbols for interpreted methods (with address range of the method's bytecode). + // Add symbols for dex files. if (!debug_info.dex_files.empty() && builder->GetDex()->Exists()) { auto dex = builder->GetDex(); for (auto it : debug_info.dex_files) { @@ -109,33 +109,6 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, const DexFile* dex_file = it.second; typename ElfTypes::Word dex_name = strtab->Write(kDexFileSymbolName); symtab->Add(dex_name, dex, dex_address, dex_file->Size(), STB_GLOBAL, STT_FUNC); - if (mini_debug_info) { - continue; // Don't add interpreter method names to mini-debug-info for now. - } - for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - const uint8_t* class_data = dex_file->GetClassData(class_def); - if (class_data == nullptr) { - continue; - } - for (ClassDataItemIterator item(*dex_file, class_data); item.HasNext(); item.Next()) { - if (!item.IsAtMethod()) { - continue; - } - const DexFile::CodeItem* code_item = item.GetMethodCodeItem(); - if (code_item == nullptr) { - continue; - } - CodeItemInstructionAccessor code(*dex_file, code_item); - DCHECK(code.HasCodeItem()); - std::string name = dex_file->PrettyMethod(item.GetMemberIndex(), !mini_debug_info); - size_t name_offset = strtab->Write(name); - uint64_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - dex_file->Begin(); - uint64_t address = dex_address + offset; - size_t size = code.InsnsSizeInCodeUnits() * sizeof(uint16_t); - symtab->Add(name_offset, dex, address, size, STB_GLOBAL, STT_FUNC); - } - } } } strtab->End(); diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 28c7fe2c34..9f0aaa4e10 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -45,6 +45,85 @@ const bool kEnableQuickening = true; // Control check-cast elision. const bool kEnableCheckCastEllision = true; +// Holds the state for compiling a single method. +struct DexToDexCompiler::CompilationState { + struct QuickenedInfo { + QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} + + uint32_t dex_pc; + uint16_t dex_member_index; + }; + + CompilationState(DexToDexCompiler* compiler, + const DexCompilationUnit& unit, + const CompilationLevel compilation_level, + const std::vector<uint8_t>* quicken_data); + + const std::vector<QuickenedInfo>& GetQuickenedInfo() const { + return quickened_info_; + } + + // Returns the quickening info, or an empty array if it was not quickened. + // If already_quickened is true, then don't change anything but still return what the quicken + // data would have been. + std::vector<uint8_t> Compile(); + + const DexFile& GetDexFile() const; + + // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where + // a barrier is required. + void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); + + // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In + // this case, returns the second NOP instruction pointer. Otherwise, returns + // the given "inst". + Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); + + // Compiles a field access into a quick field access. + // The field index is replaced by an offset within an Object where we can read + // from / write to this field. Therefore, this does not involve any resolution + // at runtime. + // Since the field index is encoded with 16 bits, we can replace it only if the + // field offset can be encoded with 16 bits too. + void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_put); + + // Compiles a virtual method invocation into a quick virtual method invocation. + // The method index is replaced by the vtable index where the corresponding + // executable can be found. Therefore, this does not involve any resolution + // at runtime. + // Since the method index is encoded with 16 bits, we can replace it only if the + // vtable index can be encoded with 16 bits too. + void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_range); + + // Return the next index. + uint16_t NextIndex(); + + // Returns the dequickened index if an instruction is quickened, otherwise return index. + uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index); + + DexToDexCompiler* const compiler_; + CompilerDriver& driver_; + const DexCompilationUnit& unit_; + const CompilationLevel compilation_level_; + + // Filled by the compiler when quickening, in order to encode that information + // in the .oat file. The runtime will use that information to get to the original + // opcodes. + std::vector<QuickenedInfo> quickened_info_; + + // True if we optimized a return void to a return void no barrier. + bool optimized_return_void_ = false; + + // If the code item was already quickened previously. + const bool already_quickened_; + const QuickenInfoTable existing_quicken_info_; + uint32_t quicken_index_ = 0u; + + DISALLOW_COPY_AND_ASSIGN(CompilationState); +}; + DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver) : driver_(driver), lock_("Quicken lock", kDexToDexCompilerLock) { @@ -55,16 +134,13 @@ void DexToDexCompiler::ClearState() { MutexLock lock(Thread::Current(), lock_); active_dex_file_ = nullptr; active_bit_vector_ = nullptr; - seen_code_items_.clear(); should_quicken_.clear(); - shared_code_items_.clear(); - blacklisted_code_items_.clear(); shared_code_item_quicken_info_.clear(); } -size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const { +size_t DexToDexCompiler::NumCodeItemsToQuicken(Thread* self) const { MutexLock lock(self, lock_); - return seen_code_items_.size(); + return num_code_items_; } BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) { @@ -80,17 +156,13 @@ BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) { } void DexToDexCompiler::MarkForCompilation(Thread* self, - const MethodReference& method_ref, - const DexFile::CodeItem* code_item) { + const MethodReference& method_ref) { MutexLock lock(self, lock_); BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file); DCHECK(bitmap != nullptr); DCHECK(!bitmap->IsBitSet(method_ref.index)); bitmap->SetBit(method_ref.index); - // Detect the shared code items. - if (!seen_code_items_.insert(code_item).second) { - shared_code_items_.insert(code_item); - } + ++num_code_items_; } DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler, @@ -316,6 +388,7 @@ void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, ui << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER); + optimized_return_void_ = true; } Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst, @@ -464,51 +537,48 @@ CompiledMethod* DexToDexCompiler::CompileMethod( // If the code item is shared with multiple different method ids, make sure that we quicken only // once and verify that all the dequicken maps match. if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) { - // For shared code items, use a lock to prevent races. - MutexLock mu(soa.Self(), lock_); - // Blacklisted means there was a quickening conflict previously, bail early. - if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) { + // Avoid quickening the shared code items for now because the existing conflict detection logic + // does not currently handle cases where the code item is quickened in one place but + // compiled in another. + static constexpr bool kAvoidQuickeningSharedCodeItems = true; + if (kAvoidQuickeningSharedCodeItems) { return nullptr; } + // For shared code items, use a lock to prevent races. + MutexLock mu(soa.Self(), lock_); auto existing = shared_code_item_quicken_info_.find(code_item); - const bool already_quickened = existing != shared_code_item_quicken_info_.end(); + QuickenState* existing_data = nullptr; + std::vector<uint8_t>* existing_quicken_data = nullptr; + if (existing != shared_code_item_quicken_info_.end()) { + existing_data = &existing->second; + if (existing_data->conflict_) { + return nullptr; + } + existing_quicken_data = &existing_data->quicken_data_; + } + bool optimized_return_void; { - CompilationState state(this, - unit, - compilation_level, - already_quickened ? &existing->second.quicken_data_ : nullptr); + CompilationState state(this, unit, compilation_level, existing_quicken_data); quicken_data = state.Compile(); + optimized_return_void = state.optimized_return_void_; } // Already quickened, check that the data matches what was previously seen. MethodReference method_ref(&dex_file, method_idx); - if (already_quickened) { - QuickenState* const existing_data = &existing->second; - if (existing_data->quicken_data_ != quicken_data) { - VLOG(compiler) << "Quicken data mismatch, dequickening method " + if (existing_data != nullptr) { + if (*existing_quicken_data != quicken_data || + existing_data->optimized_return_void_ != optimized_return_void) { + VLOG(compiler) << "Quicken data mismatch, for method " << dex_file.PrettyMethod(method_idx); - // Unquicken using the existing quicken data. - optimizer::ArtDecompileDEX(dex_file, - *code_item, - ArrayRef<const uint8_t>(existing_data->quicken_data_), - /* decompile_return_instruction*/ false); - // Go clear the vmaps for all the methods that were already quickened to avoid writing them - // out during oat writing. - for (const MethodReference& ref : existing_data->methods_) { - CompiledMethod* method = driver_->GetCompiledMethod(ref); - DCHECK(method != nullptr); - method->ReleaseVMapTable(); - } - // Blacklist the method to never attempt to quicken it in the future. - blacklisted_code_items_.insert(code_item); - shared_code_item_quicken_info_.erase(existing); - return nullptr; + // Mark the method as a conflict to never attempt to quicken it in the future. + existing_data->conflict_ = true; } existing_data->methods_.push_back(method_ref); } else { QuickenState new_state; new_state.methods_.push_back(method_ref); new_state.quicken_data_ = quicken_data; + new_state.optimized_return_void_ = optimized_return_void; bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second; CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx); } @@ -556,9 +626,65 @@ CompiledMethod* DexToDexCompiler::CompileMethod( ArrayRef<const uint8_t>(quicken_data), // vmap_table ArrayRef<const uint8_t>(), // cfi data ArrayRef<const linker::LinkerPatch>()); + DCHECK(ret != nullptr); return ret; } +void DexToDexCompiler::SetDexFiles(const std::vector<const DexFile*>& dex_files) { + // Record what code items are already seen to detect when multiple methods have the same code + // item. + std::unordered_set<const DexFile::CodeItem*> seen_code_items; + for (const DexFile* dex_file : dex_files) { + for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + continue; + } + ClassDataItemIterator it(*dex_file, class_data); + it.SkipAllFields(); + for (; it.HasNextMethod(); it.Next()) { + const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); + // Detect the shared code items. + if (!seen_code_items.insert(code_item).second) { + shared_code_items_.insert(code_item); + } + } + } + } + VLOG(compiler) << "Shared code items " << shared_code_items_.size(); +} + +void DexToDexCompiler::UnquickenConflictingMethods() { + MutexLock mu(Thread::Current(), lock_); + size_t unquicken_count = 0; + for (const auto& pair : shared_code_item_quicken_info_) { + const DexFile::CodeItem* code_item = pair.first; + const QuickenState& state = pair.second; + CHECK_GE(state.methods_.size(), 1u); + if (state.conflict_) { + // Unquicken using the existing quicken data. + // TODO: Do we really need to pass a dex file in? + optimizer::ArtDecompileDEX(*state.methods_[0].dex_file, + *code_item, + ArrayRef<const uint8_t>(state.quicken_data_), + /* decompile_return_instruction*/ true); + ++unquicken_count; + // Go clear the vmaps for all the methods that were already quickened to avoid writing them + // out during oat writing. + for (const MethodReference& ref : state.methods_) { + CompiledMethod* method = driver_->RemoveCompiledMethod(ref); + if (method != nullptr) { + // There is up to one compiled method for each method ref. Releasing it leaves the + // deduped data intact, this means its safe to do even when other threads might be + // compiling. + CompiledMethod::ReleaseSwapAllocatedCompiledMethod(driver_, method); + } + } + } + } +} + } // namespace optimizer } // namespace art diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 2105a9ded4..7df09f140c 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -59,99 +59,35 @@ class DexToDexCompiler { const CompilationLevel compilation_level) WARN_UNUSED; void MarkForCompilation(Thread* self, - const MethodReference& method_ref, - const DexFile::CodeItem* code_item); + const MethodReference& method_ref); void ClearState(); + // Unquicken all methods that have conflicting quicken info. This is not done during the + // quickening process to avoid race conditions. + void UnquickenConflictingMethods(); + CompilerDriver* GetDriver() { return driver_; } bool ShouldCompileMethod(const MethodReference& ref); - size_t NumUniqueCodeItems(Thread* self) const; + // Return the number of code items to quicken. + size_t NumCodeItemsToQuicken(Thread* self) const; + + void SetDexFiles(const std::vector<const DexFile*>& dex_files); private: // Holds the state for compiling a single method. - struct CompilationState { - struct QuickenedInfo { - QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} - - uint32_t dex_pc; - uint16_t dex_member_index; - }; - - CompilationState(DexToDexCompiler* compiler, - const DexCompilationUnit& unit, - const CompilationLevel compilation_level, - const std::vector<uint8_t>* quicken_data); - - const std::vector<QuickenedInfo>& GetQuickenedInfo() const { - return quickened_info_; - } - - // Returns the quickening info, or an empty array if it was not quickened. - // If already_quickened is true, then don't change anything but still return what the quicken - // data would have been. - std::vector<uint8_t> Compile(); - - const DexFile& GetDexFile() const; - - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where - // a barrier is required. - void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); - - // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In - // this case, returns the second NOP instruction pointer. Otherwise, returns - // the given "inst". - Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); - - // Compiles a field access into a quick field access. - // The field index is replaced by an offset within an Object where we can read - // from / write to this field. Therefore, this does not involve any resolution - // at runtime. - // Since the field index is encoded with 16 bits, we can replace it only if the - // field offset can be encoded with 16 bits too. - void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_put); - - // Compiles a virtual method invocation into a quick virtual method invocation. - // The method index is replaced by the vtable index where the corresponding - // executable can be found. Therefore, this does not involve any resolution - // at runtime. - // Since the method index is encoded with 16 bits, we can replace it only if the - // vtable index can be encoded with 16 bits too. - void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range); - - // Return the next index. - uint16_t NextIndex(); - - // Returns the dequickened index if an instruction is quickened, otherwise return index. - uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index); - - DexToDexCompiler* const compiler_; - CompilerDriver& driver_; - const DexCompilationUnit& unit_; - const CompilationLevel compilation_level_; - - // Filled by the compiler when quickening, in order to encode that information - // in the .oat file. The runtime will use that information to get to the original - // opcodes. - std::vector<QuickenedInfo> quickened_info_; - - // If the code item was already quickened previously. - const bool already_quickened_; - const QuickenInfoTable existing_quicken_info_; - uint32_t quicken_index_ = 0u; - - DISALLOW_COPY_AND_ASSIGN(CompilationState); - }; + struct CompilationState; + // Quicken state for a code item, may be referenced by multiple methods. struct QuickenState { std::vector<MethodReference> methods_; std::vector<uint8_t> quicken_data_; + bool optimized_return_void_ = false; + bool conflict_ = false; }; BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_); @@ -166,15 +102,14 @@ class DexToDexCompiler { mutable Mutex lock_; // Record what method references are going to get quickened. std::unordered_map<const DexFile*, BitVector> should_quicken_; - // Record what code items are already seen to detect when multiple methods have the same code - // item. - std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_); // Guarded by lock_ during writing, accessed without a lock during quickening. // This is safe because no thread is adding to the shared code items during the quickening phase. std::unordered_set<const DexFile::CodeItem*> shared_code_items_; - std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_); + // Blacklisted code items are unquickened in UnquickenConflictingMethods. std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_ GUARDED_BY(lock_); + // Number of added code items. + size_t num_code_items_ GUARDED_BY(lock_) = 0u; }; std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index c617f54f55..fb428b8d9a 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -289,7 +289,6 @@ CompilerDriver::CompilerDriver( compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), max_arena_alloc_(0), - compiling_dex_to_dex_(false), dex_to_dex_compiler_(this) { DCHECK(compiler_options_ != nullptr); @@ -450,99 +449,39 @@ static bool InstructionSetHasGenericJniStub(InstructionSet isa) { } } -static void CompileMethod(Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache) { +template <typename CompileFn> +static void CompileMethodHarness( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache, + CompileFn compile_fn) { DCHECK(driver != nullptr); - CompiledMethod* compiled_method = nullptr; + CompiledMethod* compiled_method; uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; MethodReference method_ref(&dex_file, method_idx); - if (driver->GetCompilingDexToDex()) { - optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); - // This is the second pass when we dex-to-dex compile previously marked methods. - // TODO: Refactor the compilation to avoid having to distinguish the two passes - // here. That should be done on a higher level. http://b/29089975 - if (compiler->ShouldCompileMethod(method_ref)) { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - // Do not optimize if a VerifiedMethod is missing. SafeCast elision, - // for example, relies on it. - compiled_method = compiler->CompileMethod( - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - (verified_method != nullptr) - ? dex_to_dex_compilation_level - : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); - } - } else if ((access_flags & kAccNative) != 0) { - // Are we extracting only and have support for generic JNI down calls? - if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && - InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { - // Leaving this empty will trigger the generic JNI version - } else { - // Query any JNI optimization annotations such as @FastNative or @CriticalNative. - access_flags |= annotations::GetNativeMethodAnnotationAccessFlags( - dex_file, dex_file.GetClassDef(class_def_idx), method_idx); + compiled_method = compile_fn(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); - compiled_method = driver->GetCompiler()->JniCompile( - access_flags, method_idx, dex_file, dex_cache); - CHECK(compiled_method != nullptr); - } - } else if ((access_flags & kAccAbstract) != 0) { - // Abstract methods don't have code. - } else { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - bool compile = compilation_enabled && - // Basic checks, e.g., not <clinit>. - results->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadcata. - verified_method != nullptr && - // Do not have failures that should punt to the interpreter. - !verified_method->HasRuntimeThrow() && - (verified_method->GetEncounteredVerificationFailures() & - (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && - // Is eligable for compilation by methods-to-compile filter. - driver->IsMethodToCompile(method_ref) && - driver->ShouldCompileBasedOnProfile(method_ref); - - if (compile) { - // NOTE: if compiler declines to compile this method, it will return null. - compiled_method = driver->GetCompiler()->Compile(code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_cache); - } - if (compiled_method == nullptr && - dex_to_dex_compilation_level != - optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { - DCHECK(!Runtime::Current()->UseJitCompilation()); - DCHECK(!driver->GetCompilingDexToDex()); - // TODO: add a command-line option to disable DEX-to-DEX compilation ? - driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item); - } - } if (kTimeCompileMethod) { uint64_t duration_ns = NanoTime() - start_ns; if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) { @@ -573,6 +512,170 @@ static void CompileMethod(Thread* self, } } +static void CompileMethodDex2Dex( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + auto dex_2_dex_fn = [](Thread* self ATTRIBUTE_UNUSED, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled ATTRIBUTE_UNUSED, + Handle<mirror::DexCache> dex_cache ATTRIBUTE_UNUSED) -> CompiledMethod* { + DCHECK(driver != nullptr); + MethodReference method_ref(&dex_file, method_idx); + + optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); + + if (compiler->ShouldCompileMethod(method_ref)) { + VerificationResults* results = driver->GetVerificationResults(); + DCHECK(results != nullptr); + const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); + // Do not optimize if a VerifiedMethod is missing. SafeCast elision, + // for example, relies on it. + return compiler->CompileMethod( + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + (verified_method != nullptr) + ? dex_to_dex_compilation_level + : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); + } + return nullptr; + }; + CompileMethodHarness(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache, + dex_2_dex_fn); +} + +static void CompileMethodQuick( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + auto quick_fn = []( + Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled, + Handle<mirror::DexCache> dex_cache) { + DCHECK(driver != nullptr); + CompiledMethod* compiled_method = nullptr; + MethodReference method_ref(&dex_file, method_idx); + + if ((access_flags & kAccNative) != 0) { + // Are we extracting only and have support for generic JNI down calls? + if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && + InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { + // Leaving this empty will trigger the generic JNI version + } else { + // Query any JNI optimization annotations such as @FastNative or @CriticalNative. + access_flags |= annotations::GetNativeMethodAnnotationAccessFlags( + dex_file, dex_file.GetClassDef(class_def_idx), method_idx); + + compiled_method = driver->GetCompiler()->JniCompile( + access_flags, method_idx, dex_file, dex_cache); + CHECK(compiled_method != nullptr); + } + } else if ((access_flags & kAccAbstract) != 0) { + // Abstract methods don't have code. + } else { + VerificationResults* results = driver->GetVerificationResults(); + DCHECK(results != nullptr); + const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); + bool compile = compilation_enabled && + // Basic checks, e.g., not <clinit>. + results->IsCandidateForCompilation(method_ref, access_flags) && + // Did not fail to create VerifiedMethod metadata. + verified_method != nullptr && + // Do not have failures that should punt to the interpreter. + !verified_method->HasRuntimeThrow() && + (verified_method->GetEncounteredVerificationFailures() & + (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && + // Is eligable for compilation by methods-to-compile filter. + driver->IsMethodToCompile(method_ref) && + driver->ShouldCompileBasedOnProfile(method_ref); + + if (compile) { + // NOTE: if compiler declines to compile this method, it will return null. + compiled_method = driver->GetCompiler()->Compile(code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_cache); + } + if (compiled_method == nullptr && + dex_to_dex_compilation_level != + optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: add a command-line option to disable DEX-to-DEX compilation ? + driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref); + } + } + return compiled_method; + }; + CompileMethodHarness(self, + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache, + quick_fn); +} + void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) { DCHECK(!Runtime::Current()->IsStarted()); jobject jclass_loader; @@ -614,37 +717,34 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t *dex_file, dex_file->GetClassDef(class_def_idx)); - DCHECK(!compiling_dex_to_dex_); - CompileMethod(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - - const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self); + CompileMethodQuick(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + *dex_file, + dex_to_dex_compilation_level, + true, + dex_cache); + + const size_t num_methods = dex_to_dex_compiler_.NumCodeItemsToQuicken(self); if (num_methods != 0) { DCHECK_EQ(num_methods, 1u); - compiling_dex_to_dex_ = true; - CompileMethod(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - compiling_dex_to_dex_ = false; + CompileMethodDex2Dex(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + *dex_file, + dex_to_dex_compilation_level, + true, + dex_cache); dex_to_dex_compiler_.ClearState(); } @@ -1444,13 +1544,19 @@ class ParallelCompilationManager { void ForAll(size_t begin, size_t end, CompilationVisitor* visitor, size_t work_units) REQUIRES(!*Locks::mutator_lock_) { + ForAllLambda(begin, end, [visitor](size_t index) { visitor->Visit(index); }, work_units); + } + + template <typename Fn> + void ForAllLambda(size_t begin, size_t end, Fn fn, size_t work_units) + REQUIRES(!*Locks::mutator_lock_) { Thread* self = Thread::Current(); self->AssertNoPendingException(); CHECK_GT(work_units, 0U); index_.StoreRelaxed(begin); for (size_t i = 0; i < work_units; ++i) { - thread_pool_->AddTask(self, new ForAllClosure(this, end, visitor)); + thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn)); } thread_pool_->StartWorkers(self); @@ -1470,32 +1576,33 @@ class ParallelCompilationManager { } private: - class ForAllClosure : public Task { + template <typename Fn> + class ForAllClosureLambda : public Task { public: - ForAllClosure(ParallelCompilationManager* manager, size_t end, CompilationVisitor* visitor) + ForAllClosureLambda(ParallelCompilationManager* manager, size_t end, Fn fn) : manager_(manager), end_(end), - visitor_(visitor) {} + fn_(fn) {} - virtual void Run(Thread* self) { + void Run(Thread* self) OVERRIDE { while (true) { const size_t index = manager_->NextIndex(); if (UNLIKELY(index >= end_)) { break; } - visitor_->Visit(index); + fn_(index); self->AssertNoPendingException(); } } - virtual void Finalize() { + void Finalize() OVERRIDE { delete this; } private: ParallelCompilationManager* const manager_; const size_t end_; - CompilationVisitor* const visitor_; + Fn fn_; }; AtomicInteger index_; @@ -2574,64 +2681,33 @@ void CompilerDriver::InitializeClasses(jobject class_loader, } } -void CompilerDriver::Compile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " << - ((profile_compilation_info_ == nullptr) - ? "null" - : profile_compilation_info_->DumpInfo(&dex_files)); - } - - dex_to_dex_compiler_.ClearState(); - compiling_dex_to_dex_ = false; - for (const DexFile* dex_file : dex_files) { - CHECK(dex_file != nullptr); - CompileDexFile(class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); - const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool(); - const size_t arena_alloc = arena_pool->GetBytesAllocated(); - max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_); - Runtime::Current()->ReclaimArenaPoolMemory(); - } - - if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) { - compiling_dex_to_dex_ = true; - // TODO: Not visit all of the dex files, its probably rare that only one would have quickened - // methods though. - for (const DexFile* dex_file : dex_files) { - CompileDexFile(class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); - } - dex_to_dex_compiler_.ClearState(); - compiling_dex_to_dex_ = false; - } - - VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); -} - -class CompileClassVisitor : public CompilationVisitor { - public: - explicit CompileClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} +template <typename CompileFn> +static void CompileDexFile(CompilerDriver* driver, + jobject class_loader, + const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings, + const char* timing_name, + CompileFn compile_fn) { + TimingLogger::ScopedTiming t(timing_name, timings); + ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), + class_loader, + driver, + &dex_file, + dex_files, + thread_pool); - virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + auto compile = [&context, &compile_fn](size_t class_def_index) { ScopedTrace trace(__FUNCTION__); - const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile& dex_file = *context.GetDexFile(); const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ClassLinker* class_linker = manager_->GetClassLinker(); - jobject jclass_loader = manager_->GetClassLoader(); + ClassLinker* class_linker = context.GetClassLinker(); + jobject jclass_loader = context.GetClassLoader(); ClassReference ref(&dex_file, class_def_index); // Skip compiling classes with generic verifier failures since they will still fail at runtime - if (manager_->GetCompiler()->verification_results_->IsClassRejected(ref)) { + if (context.GetCompiler()->GetVerificationResults()->IsClassRejected(ref)) { return; } // Use a scoped object access to perform to the quick SkipClass check. @@ -2662,7 +2738,7 @@ class CompileClassVisitor : public CompilationVisitor { // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); - CompilerDriver* const driver = manager_->GetCompiler(); + CompilerDriver* const driver = context.GetCompiler(); // Can we run DEX-to-DEX compiler on this class ? optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = @@ -2685,38 +2761,71 @@ class CompileClassVisitor : public CompilationVisitor { continue; } previous_method_idx = method_idx; - CompileMethod(soa.Self(), - driver, - it.GetMethodCodeItem(), - it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), - class_def_index, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache); + compile_fn(soa.Self(), + driver, + it.GetMethodCodeItem(), + it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), + class_def_index, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); it.Next(); } DCHECK(!it.HasNext()); + }; + context.ForAllLambda(0, dex_file.NumClassDefs(), compile, thread_count); +} + +void CompilerDriver::Compile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + if (kDebugProfileGuidedCompilation) { + LOG(INFO) << "[ProfileGuidedCompilation] " << + ((profile_compilation_info_ == nullptr) + ? "null" + : profile_compilation_info_->DumpInfo(&dex_files)); } - private: - const ParallelCompilationManager* const manager_; -}; + dex_to_dex_compiler_.ClearState(); + for (const DexFile* dex_file : dex_files) { + CHECK(dex_file != nullptr); + CompileDexFile(this, + class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings, + "Compile Dex File Quick", + CompileMethodQuick); + const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool(); + const size_t arena_alloc = arena_pool->GetBytesAllocated(); + max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_); + Runtime::Current()->ReclaimArenaPoolMemory(); + } -void CompilerDriver::CompileDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) { - TimingLogger::ScopedTiming t("Compile Dex File", timings); - ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, - &dex_file, dex_files, thread_pool); - CompileClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); + if (dex_to_dex_compiler_.NumCodeItemsToQuicken(Thread::Current()) > 0u) { + // TODO: Not visit all of the dex files, its probably rare that only one would have quickened + // methods though. + for (const DexFile* dex_file : dex_files) { + CompileDexFile(this, + class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings, + "Compile Dex File Dex2Dex", + CompileMethodDex2Dex); + } + dex_to_dex_compiler_.ClearState(); + } + + VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, @@ -2731,6 +2840,12 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod(); } +CompiledMethod* CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) { + CompiledMethod* ret = nullptr; + CHECK(compiled_methods_.Remove(method_ref, &ret)); + return ret; +} + bool CompilerDriver::GetCompiledClass(const ClassReference& ref, ClassStatus* status) const { DCHECK(status != nullptr); // The table doesn't know if something wasn't inserted. For this case it will return @@ -2904,6 +3019,7 @@ void CompilerDriver::FreeThreadPools() { void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) { dex_files_for_oat_file_ = dex_files; compiled_classes_.AddDexFiles(dex_files); + dex_to_dex_compiler_.SetDexFiles(dex_files); } void CompilerDriver::SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index b51e0debbb..2b524a347d 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -165,6 +165,7 @@ class CompilerDriver { void AddCompiledMethod(const MethodReference& method_ref, CompiledMethod* const compiled_method, size_t non_relative_linker_patch_count); + CompiledMethod* RemoveCompiledMethod(const MethodReference& method_ref); void SetRequiresConstructorBarrier(Thread* self, const DexFile* dex_file, @@ -374,10 +375,6 @@ class CompilerDriver { || android::base::EndsWith(boot_image_filename, "core-optimizing.art"); } - bool GetCompilingDexToDex() const { - return compiling_dex_to_dex_; - } - optimizer::DexToDexCompiler& GetDexToDexCompiler() { return dex_to_dex_compiler_; } @@ -449,13 +446,6 @@ class CompilerDriver { void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings); - void CompileDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; @@ -541,7 +531,6 @@ class CompilerDriver { size_t max_arena_alloc_; // Compiler for dex to dex (quickening). - bool compiling_dex_to_dex_; optimizer::DexToDexCompiler dex_to_dex_compiler_; friend class CompileClassVisitor; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 2c62095458..17b94d3bdf 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -76,7 +76,7 @@ extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t cou const ArrayRef<mirror::Class*> types_array(types, count); std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array); - MutexLock mu(Thread::Current(), g_jit_debug_mutex); + MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); CreateJITCodeEntry(std::move(elf_file)); } } diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h index 1c875189c5..3145497091 100644 --- a/compiler/linker/elf_builder.h +++ b/compiler/linker/elf_builder.h @@ -18,7 +18,6 @@ #define ART_COMPILER_LINKER_ELF_BUILDER_H_ #include <vector> -#include <unordered_map> #include "arch/instruction_set.h" #include "arch/mips/instruction_set_features_mips.h" @@ -310,24 +309,27 @@ class ElfBuilder FINAL { /* info */ 0, align, /* entsize */ 0), - current_offset_(0) { + current_offset_(0), + last_offset_(0) { } Elf_Word Write(const std::string& name) { if (current_offset_ == 0) { DCHECK(name.empty()); + } else if (name == last_name_) { + return last_offset_; // Very simple string de-duplication. } - auto res = written_names_.emplace(name, current_offset_); - if (res.second) { // Inserted. - this->WriteFully(name.c_str(), name.length() + 1); - current_offset_ += name.length() + 1; - } - return res.first->second; // Offset. + last_name_ = name; + last_offset_ = current_offset_; + this->WriteFully(name.c_str(), name.length() + 1); + current_offset_ += name.length() + 1; + return last_offset_; } private: Elf_Word current_offset_; - std::unordered_map<std::string, Elf_Word> written_names_; // Dedup strings. + std::string last_name_; + Elf_Word last_offset_; }; // Writer of .dynsym and .symtab sections. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 704a0d3b87..6d49b32dbc 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2498,8 +2498,23 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { } if (!skip_overflow_check) { + // Using r4 instead of IP saves 2 bytes. UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + vixl32::Register temp; + // TODO: Remove this check when R4 is made a callee-save register + // in ART compiled code (b/72801708). Currently we need to make + // sure r4 is not blocked, e.g. in special purpose + // TestCodeGeneratorARMVIXL; also asserting that r4 is available + // here. + if (!blocked_core_registers_[R4]) { + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + temp = r4; + } else { + temp = temps.Acquire(); + } __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 36c921986b..855da2b18f 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -395,7 +395,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -3283,26 +3283,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -3331,18 +3313,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); MipsLabel done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, is_type_check_slow_path_fatal); @@ -7213,11 +7184,12 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -7265,13 +7237,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -7279,13 +7253,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; @@ -7295,7 +7271,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ Bne(out, cls, &loop); @@ -7304,13 +7280,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); @@ -7320,7 +7298,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -7330,13 +7308,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. MipsLabel success; __ Beq(out, cls, &success); @@ -7346,7 +7326,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 6657582e2a..8a06061c6a 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -352,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -2836,26 +2836,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -2884,18 +2866,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); Mips64Label done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, is_type_check_slow_path_fatal); @@ -5528,11 +5499,12 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -5580,13 +5552,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -5594,13 +5568,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; @@ -5610,7 +5586,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ Bnec(out, cls, &loop); @@ -5619,13 +5595,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); @@ -5635,7 +5613,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -5645,13 +5623,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. Mips64Label success; __ Beqc(out, cls, &success); @@ -5661,7 +5641,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index cca1055ac8..9fa0f72e80 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -146,6 +146,65 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi } } +static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) { + // Test for an if as last statement. + if (!block->EndsWithIf()) { + return false; + } + HIf* ifs = block->GetLastInstruction()->AsIf(); + // Find either: + // if obj == null + // throws + // else + // not_throws + // or: + // if obj != null + // not_throws + // else + // throws + HInstruction* cond = ifs->InputAt(0); + HBasicBlock* not_throws = nullptr; + if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) { + not_throws = ifs->IfFalseSuccessor(); + } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) { + not_throws = ifs->IfTrueSuccessor(); + } else { + return false; + } + DCHECK(cond->IsEqual() || cond->IsNotEqual()); + HInstruction* obj = cond->InputAt(1); + if (obj->IsNullConstant()) { + obj = cond->InputAt(0); + } else if (!cond->InputAt(0)->IsNullConstant()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. + HBoundType* bound = nullptr; + const HUseList<HInstruction*>& uses = obj->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + ++it; // increment before possibly replacing + if (user->IsNullCheck()) { + HBasicBlock* user_block = user->GetBlock(); + if (user_block != block && + user_block != throws && + block->Dominates(user_block)) { + if (bound == nullptr) { + ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); + bound->SetUpperBound(ti, /*can_be_null*/ false); + bound->SetReferenceTypeInfo(ti); + bound->SetCanBeNull(false); + not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction()); + } + user->ReplaceWith(bound); + user_block->RemoveInstruction(user); + } + } + } + return bound != nullptr; +} + // Simplify the pattern: // // B1 @@ -203,6 +262,11 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { block->ReplaceSuccessor(succ, exit); rerun_dominance_and_loop_analysis = true; MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + if (RemoveNonNullControlDependences(pred, block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); + } } } } diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 88326d321b..8b4eae1780 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -25,6 +25,51 @@ #include <iostream> +/** + * The general algorithm of load-store elimination (LSE). + * Load-store analysis in the previous pass collects a list of heap locations + * and does alias analysis of those heap locations. + * LSE keeps track of a list of heap values corresponding to the heap + * locations. It visits basic blocks in reverse post order and for + * each basic block, visits instructions sequentially, and processes + * instructions as follows: + * - If the instruction is a load, and the heap location for that load has a + * valid heap value, the load can be eliminated. In order to maintain the + * validity of all heap locations during the optimization phase, the real + * elimination is delayed till the end of LSE. + * - If the instruction is a store, it updates the heap value for the heap + * location of the store with the store instruction. The real heap value + * can be fetched from the store instruction. Heap values are invalidated + * for heap locations that may alias with the store instruction's heap + * location. The store instruction can be eliminated unless the value stored + * is later needed e.g. by a load from the same/aliased heap location or + * the heap location persists at method return/deoptimization. + * The store instruction is also needed if it's not used to track the heap + * value anymore, e.g. when it fails to merge with the heap values from other + * predecessors. + * - A store that stores the same value as the heap value is eliminated. + * - The list of heap values are merged at basic block entry from the basic + * block's predecessors. The algorithm is single-pass, so loop side-effects is + * used as best effort to decide if a heap location is stored inside the loop. + * - A special type of objects called singletons are instantiated in the method + * and have a single name, i.e. no aliases. Singletons have exclusive heap + * locations since they have no aliases. Singletons are helpful in narrowing + * down the life span of a heap location such that they do not always + * need to participate in merging heap values. Allocation of a singleton + * can be eliminated if that singleton is not used and does not persist + * at method return/deoptimization. + * - For newly instantiated instances, their heap values are initialized to + * language defined default values. + * - Some instructions such as invokes are treated as loading and invalidating + * all the heap values, depending on the instruction's side effects. + * - Finalizable objects are considered as persisting at method + * return/deoptimization. + * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad + * and VecStore instructions. + * - Currently this LSE algorithm doesn't handle graph with try-catch, due to + * the special block merging structure. + */ + namespace art { // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. @@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor { removed_loads_(allocator_.Adapter(kArenaAllocLSE)), substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)), possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) { + singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor { return type_conversion; } - // Find an instruction's substitute if it should be removed. + // Find an instruction's substitute if it's a removed load. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { + if (!IsLoad(instruction)) { + return instruction; + } size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { if (removed_loads_[i] == instruction) { - return substitute_instructions_for_loads_[i]; + HInstruction* substitute = substitute_instructions_for_loads_[i]; + // The substitute list is a flat hierarchy. + DCHECK_EQ(FindSubstitute(substitute), substitute); + return substitute; } } return instruction; } void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK(IsLoad(load)); DCHECK_EQ(FindSubstitute(heap_value), heap_value) << "Unexpected heap_value that has a substitute " << heap_value->DebugName(); removed_loads_.push_back(load); @@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor { new_instance->GetBlock()->RemoveInstruction(new_instance); } } - for (HInstruction* new_array : singleton_new_arrays_) { - size_t removed = HConstructorFence::RemoveConstructorFences(new_array); - MaybeRecordStat(stats_, - MethodCompilationStat::kConstructorFenceRemovedLSE, - removed); + } - if (!new_array->HasNonEnvironmentUses()) { - new_array->RemoveEnvironmentUsers(); - new_array->GetBlock()->RemoveInstruction(new_array); - } + private: + static bool IsLoad(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; } + // Unresolved load is not treated as a load. + return instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet(); } - private: - // If heap_values[index] is an instance field store, need to keep the store. - // This is necessary if a heap value is killed due to merging, or loop side - // effects (which is essentially merging also), since a load later from the - // location won't be eliminated. + static bool IsStore(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; + } + // Unresolved store is not treated as a store. + return instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->IsStaticFieldSet(); + } + + // Returns the real heap value by finding its substitute or by "peeling" + // a store instruction. + HInstruction* GetRealHeapValue(HInstruction* heap_value) { + if (IsLoad(heap_value)) { + return FindSubstitute(heap_value); + } + if (!IsStore(heap_value)) { + return heap_value; + } + + // We keep track of store instructions as the heap values which might be + // eliminated if the stores are later found not necessary. The real stored + // value needs to be fetched from the store instruction. + if (heap_value->IsInstanceFieldSet()) { + heap_value = heap_value->AsInstanceFieldSet()->GetValue(); + } else if (heap_value->IsStaticFieldSet()) { + heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else { + DCHECK(heap_value->IsArraySet()); + heap_value = heap_value->AsArraySet()->GetValue(); + } + // heap_value may already be a removed load. + return FindSubstitute(heap_value); + } + + // If heap_value is a store, need to keep the store. + // This is necessary if a heap value is killed or replaced by another value, + // so that the store is no longer used to track heap value. void KeepIfIsStore(HInstruction* heap_value) { - if (heap_value == kDefaultHeapValue || - heap_value == kUnknownHeapValue || - !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { + if (!IsStore(heap_value)) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor { } } + // If a heap location X may alias with heap location at `loc_index` + // and heap_values of that heap location X holds a store, keep that store. + // It's needed for a dependent load that's not eliminated since any store + // that may put value into the load's heap location needs to be kept. + void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values, + size_t loc_index) { + for (size_t i = 0; i < heap_values.size(); i++) { + if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) { + KeepIfIsStore(heap_values[i]); + } + } + } + void HandleLoopSideEffects(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ScopedArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; - // Don't eliminate loads in irreducible loops. This is safe for singletons, because - // they are always used by the non-eliminated loop-phi. + // Don't eliminate loads in irreducible loops. + // Also keep the stores before the loop. if (block->GetLoopInformation()->IsIrreducible()) { if (kIsDebugBuild) { for (size_t i = 0; i < heap_values.size(); i++) { DCHECK_EQ(heap_values[i], kUnknownHeapValue); } } + for (size_t i = 0; i < heap_values.size(); i++) { + KeepIfIsStore(pre_header_heap_values[i]); + } return; } - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ScopedArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; @@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor { for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* ref_info = location->GetReferenceInfo(); - if (ref_info->IsSingletonAndRemovable() && - !location->IsValueKilledByLoopSideEffects()) { - // A removable singleton's field that's not stored into inside a loop is + if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) { + // A singleton's field that's not stored into inside a loop is // invariant throughout the loop. Nothing to do. } else { - // heap value is killed by loop side effects (stored into directly, or - // due to aliasing). Or the heap value may be needed after method return - // or deoptimization. + // heap value is killed by loop side effects. KeepIfIsStore(pre_header_heap_values[i]); heap_values[i] = kUnknownHeapValue; } } + } else { + // The loop doesn't kill any value. } } @@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* merged_value = nullptr; + // If we can merge the store itself from the predecessors, we keep + // the store as the heap value as long as possible. In case we cannot + // merge the store, we try to merge the values of the stores. + HInstruction* merged_store_value = nullptr; // Whether merged_value is a result that's merged from all predecessors. bool from_all_predecessors = true; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* ref = ref_info->GetReference(); HInstruction* singleton_ref = nullptr; if (ref_info->IsSingleton()) { - // We do more analysis of liveness when merging heap values for such - // cases since stores into such references may potentially be eliminated. - singleton_ref = ref_info->GetReference(); + // We do more analysis based on singleton's liveness when merging + // heap values for such cases. + singleton_ref = ref; } for (HBasicBlock* predecessor : predecessors) { HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if (!IsStore(pred_value)) { + pred_value = FindSubstitute(pred_value); + } + DCHECK(pred_value != nullptr); + HInstruction* pred_store_value = GetRealHeapValue(pred_value); if ((singleton_ref != nullptr) && !singleton_ref->GetBlock()->Dominates(predecessor)) { - // singleton_ref is not live in this predecessor. Skip this predecessor since - // it does not really have the location. + // singleton_ref is not live in this predecessor. No need to merge + // since singleton_ref is not live at the beginning of this block. DCHECK_EQ(pred_value, kUnknownHeapValue); from_all_predecessors = false; - continue; + break; } if (merged_value == nullptr) { // First seen heap value. + DCHECK(pred_value != nullptr); merged_value = pred_value; } else if (pred_value != merged_value) { // There are conflicting values. merged_value = kUnknownHeapValue; + // We may still be able to merge store values. + } + + // Conflicting stores may be storing the same value. We do another merge + // of real stored values. + if (merged_store_value == nullptr) { + // First seen store value. + DCHECK(pred_store_value != nullptr); + merged_store_value = pred_store_value; + } else if (pred_store_value != merged_store_value) { + // There are conflicting store values. + merged_store_value = kUnknownHeapValue; + // There must be conflicting stores also. + DCHECK_EQ(merged_value, kUnknownHeapValue); + // No need to merge anymore. break; } } - if (ref_info->IsSingleton()) { - if (ref_info->IsSingletonAndNonRemovable() || - (merged_value == kUnknownHeapValue && - !block->IsSingleReturnOrReturnVoidAllowingPhis())) { - // The heap value may be needed after method return or deoptimization, - // or there are conflicting heap values from different predecessors and - // this block is not a single return, - // keep the last store in each predecessor since future loads may not - // be eliminated. + if (merged_value == nullptr) { + DCHECK(!from_all_predecessors); + DCHECK(singleton_ref != nullptr); + } + if (from_all_predecessors) { + if (ref_info->IsSingletonAndRemovable() && + block->IsSingleReturnOrReturnVoidAllowingPhis()) { + // Values in the singleton are not needed anymore. + } else if (!IsStore(merged_value)) { + // We don't track merged value as a store anymore. We have to + // hold the stores in predecessors live here. for (HBasicBlock* predecessor : predecessors) { ScopedArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; @@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor { } } } else { - // Currenctly we don't eliminate stores to non-singletons. + DCHECK(singleton_ref != nullptr); + // singleton_ref is non-existing at the beginning of the block. There is + // no need to keep the stores. } - if ((merged_value == nullptr) || !from_all_predecessors) { + if (!from_all_predecessors) { DCHECK(singleton_ref != nullptr); DCHECK((singleton_ref->GetBlock() == block) || - !singleton_ref->GetBlock()->Dominates(block)); + !singleton_ref->GetBlock()->Dominates(block)) + << "method: " << GetGraph()->GetMethodName(); // singleton_ref is not defined before block or defined only in some of its // predecessors, so block doesn't really have the location at its entry. heap_values[i] = kUnknownHeapValue; - } else { + } else if (predecessors.size() == 1) { + // Inherit heap value from the single predecessor. + DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value); heap_values[i] = merged_value; + } else { + DCHECK(merged_value == kUnknownHeapValue || + merged_value == kDefaultHeapValue || + merged_value->GetBlock()->Dominates(block)); + if (merged_value != kUnknownHeapValue) { + heap_values[i] = merged_value; + } else { + // Stores in different predecessors may be storing the same value. + heap_values[i] = merged_store_value; + } } } } @@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue) { - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same - // field/element that a "removed" store puts the value. That store - // must be to a singleton's field/element. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); - // heap_value may already have a substitute. - heap_value = FindSubstitute(heap_value); - } - } + heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); } else { if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { // The only situation where the same heap location has different type is when @@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); } + // Load isn't eliminated. Put the load as the value into the HeapLocation. + // This acts like GVN but with better aliasing analysis. + heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); return; } AddRemovedLoad(instruction, heap_value); @@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor { } bool Equal(HInstruction* heap_value, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + if (heap_value == kUnknownHeapValue) { + // Don't compare kUnknownHeapValue with other values. + return false; + } if (heap_value == value) { return true; } if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) { return true; } + HInstruction* real_heap_value = GetRealHeapValue(heap_value); + if (real_heap_value != heap_value) { + return Equal(real_heap_value, value); + } return false; } @@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor { size_t vector_length, int16_t declaring_class_def_index, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); @@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool same_value = false; bool possibly_redundant = false; + if (Equal(heap_value, value)) { // Store into the heap location with the same value. - same_value = true; - } else if (index != nullptr && - heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) { - // For array element, don't eliminate stores if the location can be aliased - // (due to either ref or index aliasing). - } else if (ref_info->IsSingleton()) { - // Store into a field/element of a singleton. The value cannot be killed due to - // aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. - // Stores whose values may be needed after method return or deoptimization - // are also removed from possibly_removed_stores_ when that is detected. - possibly_redundant = true; + // This store can be eliminated right away. + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else { HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - // instruction is a store in the loop so the loop must does write. + if (loop_info == nullptr) { + // Store is not in a loop. We try to precisely track the heap value by + // the store. + possibly_redundant = true; + } else if (!loop_info->IsIrreducible()) { + // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - - if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { - DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); - // Keep the store since its value may be needed at the loop header. - possibly_redundant = false; - } else { - // The singleton is created inside the loop. Value stored to it isn't needed at + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. + possibly_redundant = true; + } else { + // Keep the store since its value may be needed at the loop header. } + } else { + // Keep the store inside irreducible loops. } } - if (same_value || possibly_redundant) { + if (possibly_redundant) { possibly_removed_stores_.push_back(instruction); } - if (!same_value) { - if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); - // Put the store as the heap value. If the value is loaded from heap - // by a load later, this store isn't really redundant. - heap_values[idx] = instruction; - } else { - heap_values[idx] = value; - } - } + // Put the store as the heap value. If the value is loaded or needed after + // return/deoptimization later, this store isn't really redundant. + heap_values[idx] = instruction; + // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { if (i == idx) { continue; } - if (heap_values[i] == value) { + if (Equal(heap_values[i], value)) { // Same value should be kept even if aliasing happens. continue; } @@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor { continue; } if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias. + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { - // Filter out fake instructions before checking instruction kind below. - if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) { - continue; - } // A store is kept as the heap value for possibly removed stores. - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - // Check whether the reference for a store is used by an environment local of - // HDeoptimize. + // That value stored is generally observeable after deoptimization, except + // for singletons that don't escape after deoptimization. + if (IsStore(heap_value)) { + if (heap_value->IsStaticFieldSet()) { + KeepIfIsStore(heap_value); + continue; + } HInstruction* reference = heap_value->InputAt(0); - DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()); - for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { - HEnvironment* user = use.GetUser(); - if (user->GetHolder() == instruction) { - // The singleton for the store is visible at this deoptimization - // point. Need to keep the store so that the heap value is - // seen by the interpreter. + if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) { + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable objects alway escape. KeepIfIsStore(heap_value); + continue; + } + // Check whether the reference for a store is used by an environment local of + // HDeoptimize. If not, the singleton is not observed after + // deoptimizion. + for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder() == instruction) { + // The singleton for the store is visible at this deoptimization + // point. Need to keep the store so that the heap value is + // seen by the interpreter. + KeepIfIsStore(heap_value); + } } + } else { + KeepIfIsStore(heap_value); } } } @@ -691,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor { // Singleton references cannot be seen by the callee. } else { if (side_effects.DoesAnyRead()) { + // Invocation may read the heap value. KeepIfIsStore(heap_values[i]); } if (side_effects.DoesAnyWrite()) { + // Keep the store since it's not used to track the heap value anymore. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -758,7 +904,7 @@ class LSEVisitor : public HGraphDelegateVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_arrays_.push_back(new_array); + singleton_new_instances_.push_back(new_array); } ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; @@ -791,7 +937,6 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*> possibly_removed_stores_; ScopedArenaVector<HInstruction*> singleton_new_instances_; - ScopedArenaVector<HInstruction*> singleton_new_arrays_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 47ef194574..b3f23a0dcd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1410,7 +1410,7 @@ void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDe GetCompilerDriver()->GetInstructionSetFeatures(), mini_debug_info, ArrayRef<const debug::MethodDebugInfo>(&info, 1)); - MutexLock mu(Thread::Current(), g_jit_debug_mutex); + MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); JITCodeEntry* entry = CreateJITCodeEntry(elf_file); IncrementJITCodeEntryRefcount(entry, info.code_address); diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h index 203e484fb7..7023b9a0e8 100644 --- a/compiler/utils/atomic_dex_ref_map-inl.h +++ b/compiler/utils/atomic_dex_ref_map-inl.h @@ -75,6 +75,18 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Get(const DexFileRefer } template <typename DexFileReferenceType, typename Value> +inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Remove(const DexFileReferenceType& ref, + Value* out) { + ElementArray* const array = GetArray(ref.dex_file); + if (array == nullptr) { + return false; + } + *out = (*array)[ref.index].LoadRelaxed(); + (*array)[ref.index].StoreSequentiallyConsistent(nullptr); + return true; +} + +template <typename DexFileReferenceType, typename Value> inline void AtomicDexRefMap<DexFileReferenceType, Value>::AddDexFile(const DexFile* dex_file) { arrays_.Put(dex_file, std::move(ElementArray(NumberOfDexIndices(dex_file)))); } diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h index 9ff506d6a4..3474e16b8d 100644 --- a/compiler/utils/atomic_dex_ref_map.h +++ b/compiler/utils/atomic_dex_ref_map.h @@ -45,6 +45,9 @@ class AtomicDexRefMap { // Retreive an item, returns false if the dex file is not added. bool Get(const DexFileReferenceType& ref, Value* out) const; + // Remove an item and return the existing value. Returns false if the dex file is not added. + bool Remove(const DexFileReferenceType& ref, Value* out); + // Dex files must be added before method references belonging to them can be used as keys. Not // thread safe. void AddDexFile(const DexFile* dex_file); |