diff options
Diffstat (limited to 'compiler')
77 files changed, 1637 insertions, 1383 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index a76539d71a..2e60e7d658 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -250,6 +250,12 @@ art_cc_library { shared_libs: [ "libart", ], + + pgo: { + instrumentation: true, + profile_file: "art/dex2oat.profdata", + benchmarks: ["dex2oat"], + } } art_cc_library { diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index e41371855d..0f69dbab94 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -159,4 +159,10 @@ CompiledMethod::~CompiledMethod() { storage->ReleaseMethodInfo(method_info_); } +void CompiledMethod::ReleaseVMapTable() { + CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage(); + storage->ReleaseVMapTable(vmap_table_); + vmap_table_ = nullptr; +} + } // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index acdce260e5..4e8f3efe5a 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -168,6 +168,10 @@ class CompiledMethod FINAL : public CompiledCode { ArrayRef<const linker::LinkerPatch> GetPatches() const; + // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap + // table to avoid writing the quicken info to the vdex file. + void ReleaseVMapTable(); + private: static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits; static constexpr size_t kIsIntrinsicSize = 1u; @@ -186,7 +190,7 @@ class CompiledMethod FINAL : public CompiledCode { // For quick code, method specific information that is not very dedupe friendly (method indices). const LengthPrefixedArray<uint8_t>* const method_info_; // For quick code, holds code infos which contain stack maps, inline information, and etc. - const LengthPrefixedArray<uint8_t>* const vmap_table_; + const LengthPrefixedArray<uint8_t>* vmap_table_; // For quick code, a FDE entry for the debug_frame section. const LengthPrefixedArray<uint8_t>* const cfi_info_; // For quick code, linker patches needed by the method. diff --git a/compiler/compiler.cc b/compiler/compiler.cc index 60977b6bd5..7c7ae71d77 100644 --- a/compiler/compiler.cc +++ b/compiler/compiler.cc @@ -47,7 +47,7 @@ bool Compiler::IsPathologicalCase(const DexFile::CodeItem& code_item, * Dalvik uses 16-bit uints for instruction and register counts. We'll limit to a quarter * of that, which also guarantees we cannot overflow our 16-bit internal Quick SSA name space. */ - CodeItemDataAccessor accessor(&dex_file, &code_item); + CodeItemDataAccessor accessor(dex_file, &code_item); if (accessor.InsnsSizeInCodeUnits() >= UINT16_MAX / 4) { LOG(INFO) << "Method exceeds compiler instruction limit: " << accessor.InsnsSizeInCodeUnits() diff --git a/compiler/debug/debug_info.h b/compiler/debug/debug_info.h new file mode 100644 index 0000000000..04c6991ea3 --- /dev/null +++ b/compiler/debug/debug_info.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEBUG_DEBUG_INFO_H_ +#define ART_COMPILER_DEBUG_DEBUG_INFO_H_ + +#include <map> + +#include "base/array_ref.h" +#include "method_debug_info.h" + +namespace art { +class DexFile; + +namespace debug { + +// References inputs for all debug information which can be written into the ELF file. +struct DebugInfo { + // Describes compiled code in the .text section. + ArrayRef<const MethodDebugInfo> compiled_methods; + + // Describes dex-files in the .dex section. + std::map<uint32_t, const DexFile*> dex_files; // Offset in section -> dex file content. + + bool Empty() const { + return compiled_methods.empty() && dex_files.empty(); + } +}; + +} // namespace debug +} // namespace art + +#endif // ART_COMPILER_DEBUG_DEBUG_INFO_H_ diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index e2bea8e096..893cad288b 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -49,7 +49,7 @@ static void LocalInfoCallback(void* ctx, const DexFile::LocalInfo& entry) { static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) { std::vector<const char*> names; - CodeItemDebugInfoAccessor accessor(mi->dex_file, mi->code_item); + CodeItemDebugInfoAccessor accessor(*mi->dex_file, mi->code_item, mi->dex_method_index); if (accessor.HasCodeItem()) { DCHECK(mi->dex_file != nullptr); const uint8_t* stream = mi->dex_file->GetDebugInfoStream(accessor.DebugInfoOffset()); @@ -163,7 +163,7 @@ class ElfCompilationUnitWriter { for (auto mi : compilation_unit.methods) { DCHECK(mi->dex_file != nullptr); const DexFile* dex = mi->dex_file; - CodeItemDebugInfoAccessor accessor(dex, mi->code_item); + CodeItemDebugInfoAccessor accessor(*dex, mi->code_item, mi->dex_method_index); const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index); const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method); const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto); diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index 9910e7a4ce..44504c1efb 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -159,7 +159,7 @@ class ElfDebugLineWriter { PositionInfos dex2line_map; DCHECK(mi->dex_file != nullptr); const DexFile* dex = mi->dex_file; - CodeItemDebugInfoAccessor accessor(dex, mi->code_item); + CodeItemDebugInfoAccessor accessor(*dex, mi->code_item, mi->dex_method_index); const uint32_t debug_info_offset = accessor.DebugInfoOffset(); if (!dex->DecodeDebugPositionInfo(debug_info_offset, PositionInfoCallback, &dex2line_map)) { continue; diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index 34c2919a21..9ea9f01cd9 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -149,7 +149,7 @@ static std::vector<VariableLocation> GetVariableLocations( DCHECK_LT(stack_map_index, dex_register_maps.size()); DexRegisterMap dex_register_map = dex_register_maps[stack_map_index]; DCHECK(dex_register_map.IsValid()); - CodeItemDataAccessor accessor(method_info->dex_file, method_info->code_item); + CodeItemDataAccessor accessor(*method_info->dex_file, method_info->code_item); reg_lo = dex_register_map.GetDexRegisterLocation( vreg, accessor.RegistersSize(), code_info, encoding); if (is64bitValue) { diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index a6267292bf..df5bb37358 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -38,18 +38,18 @@ namespace debug { template <typename ElfTypes> void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, + const DebugInfo& debug_info, dwarf::CFIFormat cfi_format, bool write_oat_patches) { // Write .strtab and .symtab. - WriteDebugSymbols(builder, method_infos, true /* with_signature */); + WriteDebugSymbols(builder, false /* mini-debug-info */, debug_info); // Write .debug_frame. - WriteCFISection(builder, method_infos, cfi_format, write_oat_patches); + WriteCFISection(builder, debug_info.compiled_methods, cfi_format, write_oat_patches); // Group the methods into compilation units based on class. std::unordered_map<const DexFile::ClassDef*, ElfCompilationUnit> class_to_compilation_unit; - for (const MethodDebugInfo& mi : method_infos) { + for (const MethodDebugInfo& mi : debug_info.compiled_methods) { if (mi.dex_file != nullptr) { auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index); ElfCompilationUnit& cu = class_to_compilation_unit[&dex_class_def]; @@ -108,21 +108,27 @@ void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, std::vector<uint8_t> MakeMiniDebugInfo( InstructionSet isa, const InstructionSetFeatures* features, - uint64_t text_address, - size_t text_size, - const ArrayRef<const MethodDebugInfo>& method_infos) { + uint64_t text_section_address, + size_t text_section_size, + uint64_t dex_section_address, + size_t dex_section_size, + const DebugInfo& debug_info) { if (Is64BitInstructionSet(isa)) { return MakeMiniDebugInfoInternal<ElfTypes64>(isa, features, - text_address, - text_size, - method_infos); + text_section_address, + text_section_size, + dex_section_address, + dex_section_size, + debug_info); } else { return MakeMiniDebugInfoInternal<ElfTypes32>(isa, features, - text_address, - text_size, - method_infos); + text_section_address, + text_section_size, + dex_section_address, + dex_section_size, + debug_info); } } @@ -131,9 +137,17 @@ static std::vector<uint8_t> MakeElfFileForJITInternal( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& mi) { - CHECK_EQ(mi.is_code_address_text_relative, false); - ArrayRef<const MethodDebugInfo> method_infos(&mi, 1); + ArrayRef<const MethodDebugInfo> method_infos) { + CHECK_GT(method_infos.size(), 0u); + uint64_t min_address = std::numeric_limits<uint64_t>::max(); + uint64_t max_address = 0; + for (const MethodDebugInfo& mi : method_infos) { + CHECK_EQ(mi.is_code_address_text_relative, false); + min_address = std::min(min_address, mi.code_address); + max_address = std::max(max_address, mi.code_address + mi.code_size); + } + DebugInfo debug_info{}; + debug_info.compiled_methods = method_infos; std::vector<uint8_t> buffer; buffer.reserve(KB); linker::VectorOutputStream out("Debug ELF file", &buffer); @@ -144,14 +158,16 @@ static std::vector<uint8_t> MakeElfFileForJITInternal( if (mini_debug_info) { std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa, features, - mi.code_address, - mi.code_size, - method_infos); + min_address, + max_address - min_address, + /* dex_section_address */ 0, + /* dex_section_size */ 0, + debug_info); builder->WriteSection(".gnu_debugdata", &mdi); } else { - builder->GetText()->AllocateVirtualMemory(mi.code_address, mi.code_size); + builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address); WriteDebugInfo(builder.get(), - method_infos, + debug_info, dwarf::DW_DEBUG_FRAME_FORMAT, false /* write_oat_patches */); } @@ -164,11 +180,11 @@ std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& method_info) { + ArrayRef<const MethodDebugInfo> method_infos) { if (Is64BitInstructionSet(isa)) { - return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_info); + return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_infos); } else { - return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_info); + return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_infos); } } @@ -209,12 +225,12 @@ std::vector<uint8_t> WriteDebugElfFileForClasses(InstructionSet isa, // Explicit instantiations template void WriteDebugInfo<ElfTypes32>( linker::ElfBuilder<ElfTypes32>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, + const DebugInfo& debug_info, dwarf::CFIFormat cfi_format, bool write_oat_patches); template void WriteDebugInfo<ElfTypes64>( linker::ElfBuilder<ElfTypes64>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, + const DebugInfo& debug_info, dwarf::CFIFormat cfi_format, bool write_oat_patches); diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index a47bf076b9..e442e0016c 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -23,6 +23,7 @@ #include "base/macros.h" #include "base/mutex.h" #include "debug/dwarf/dwarf_constants.h" +#include "debug/debug_info.h" #include "linker/elf_builder.h" namespace art { @@ -36,7 +37,7 @@ struct MethodDebugInfo; template <typename ElfTypes> void WriteDebugInfo( linker::ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, + const DebugInfo& debug_info, dwarf::CFIFormat cfi_format, bool write_oat_patches); @@ -45,13 +46,15 @@ std::vector<uint8_t> MakeMiniDebugInfo( const InstructionSetFeatures* features, uint64_t text_section_address, size_t text_section_size, - const ArrayRef<const MethodDebugInfo>& method_infos); + uint64_t dex_section_address, + size_t dex_section_size, + const DebugInfo& debug_info); std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& method_info); + ArrayRef<const MethodDebugInfo> method_infos); std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h index 78b8e2780c..a88c5cb213 100644 --- a/compiler/debug/elf_gnu_debugdata_writer.h +++ b/compiler/debug/elf_gnu_debugdata_writer.h @@ -82,18 +82,23 @@ static std::vector<uint8_t> MakeMiniDebugInfoInternal( const InstructionSetFeatures* features, typename ElfTypes::Addr text_section_address, size_t text_section_size, - const ArrayRef<const MethodDebugInfo>& method_infos) { + typename ElfTypes::Addr dex_section_address, + size_t dex_section_size, + const DebugInfo& debug_info) { std::vector<uint8_t> buffer; buffer.reserve(KB); linker::VectorOutputStream out("Mini-debug-info ELF file", &buffer); std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder( new linker::ElfBuilder<ElfTypes>(isa, features, &out)); builder->Start(false /* write_program_headers */); - // Mirror .text as NOBITS section since the added symbols will reference it. + // Mirror ELF sections as NOBITS since the added symbols will reference them. builder->GetText()->AllocateVirtualMemory(text_section_address, text_section_size); - WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */); + if (dex_section_size != 0) { + builder->GetDex()->AllocateVirtualMemory(dex_section_address, dex_section_size); + } + WriteDebugSymbols(builder.get(), true /* mini-debug-info */, debug_info); WriteCFISection(builder.get(), - method_infos, + debug_info.compiled_methods, dwarf::DW_DEBUG_FRAME_FORMAT, false /* write_oat_paches */); builder->End(); diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index 57e010f232..9c9e8b35b8 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -17,9 +17,13 @@ #ifndef ART_COMPILER_DEBUG_ELF_SYMTAB_WRITER_H_ #define ART_COMPILER_DEBUG_ELF_SYMTAB_WRITER_H_ +#include <map> #include <unordered_set> +#include "debug/debug_info.h" #include "debug/method_debug_info.h" +#include "dex/dex_file-inl.h" +#include "dex/code_item_accessors.h" #include "linker/elf_builder.h" #include "utils.h" @@ -35,22 +39,26 @@ namespace debug { // one symbol which marks the whole .text section as code. constexpr bool kGenerateSingleArmMappingSymbol = true; +// Magic name for .symtab symbols which enumerate dex files used +// by this ELF file (currently mmapped inside the .dex section). +constexpr const char* kDexFileSymbolName = "$dexfile"; + template <typename ElfTypes> static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, - bool with_signature) { + bool mini_debug_info, + const DebugInfo& debug_info) { uint64_t mapping_symbol_address = std::numeric_limits<uint64_t>::max(); auto* strtab = builder->GetStrTab(); auto* symtab = builder->GetSymTab(); - if (method_infos.empty()) { + if (debug_info.Empty()) { return; } // Find all addresses which contain deduped methods. // The first instance of method is not marked deduped_, but the rest is. std::unordered_set<uint64_t> deduped_addresses; - for (const MethodDebugInfo& info : method_infos) { + for (const MethodDebugInfo& info : debug_info.compiled_methods) { if (info.deduped) { deduped_addresses.insert(info.code_address); } @@ -58,25 +66,21 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, strtab->Start(); strtab->Write(""); // strtab should start with empty string. - std::string last_name; - size_t last_name_offset = 0; - for (const MethodDebugInfo& info : method_infos) { + // Add symbols for compiled methods. + for (const MethodDebugInfo& info : debug_info.compiled_methods) { if (info.deduped) { continue; // Add symbol only for the first instance. } size_t name_offset; - if (!info.trampoline_name.empty()) { - name_offset = strtab->Write(info.trampoline_name); + if (!info.custom_name.empty()) { + name_offset = strtab->Write(info.custom_name); } else { DCHECK(info.dex_file != nullptr); - std::string name = info.dex_file->PrettyMethod(info.dex_method_index, with_signature); + std::string name = info.dex_file->PrettyMethod(info.dex_method_index, !mini_debug_info); if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) { name += " [DEDUPED]"; } - // If we write method names without signature, we might see the same name multiple times. - name_offset = (name == last_name ? last_name_offset : strtab->Write(name)); - last_name = std::move(name); - last_name_offset = name_offset; + name_offset = strtab->Write(name); } const auto* text = builder->GetText(); @@ -97,13 +101,47 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, } } } + // Add symbols for interpreted methods (with address range of the method's bytecode). + if (!debug_info.dex_files.empty() && builder->GetDex()->Exists()) { + auto dex = builder->GetDex(); + for (auto it : debug_info.dex_files) { + uint64_t dex_address = dex->GetAddress() + it.first /* offset within the section */; + const DexFile* dex_file = it.second; + typename ElfTypes::Word dex_name = strtab->Write(kDexFileSymbolName); + symtab->Add(dex_name, dex, dex_address, dex_file->Size(), STB_GLOBAL, STT_FUNC); + if (mini_debug_info) { + continue; // Don't add interpreter method names to mini-debug-info for now. + } + for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + continue; + } + for (ClassDataItemIterator item(*dex_file, class_data); item.HasNext(); item.Next()) { + if (!item.IsAtMethod()) { + continue; + } + const DexFile::CodeItem* code_item = item.GetMethodCodeItem(); + if (code_item == nullptr) { + continue; + } + CodeItemInstructionAccessor code(*dex_file, code_item); + DCHECK(code.HasCodeItem()); + std::string name = dex_file->PrettyMethod(item.GetMemberIndex(), !mini_debug_info); + size_t name_offset = strtab->Write(name); + uint64_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - dex_file->Begin(); + uint64_t address = dex_address + offset; + size_t size = code.InsnsSizeInCodeUnits() * sizeof(uint16_t); + symtab->Add(name_offset, dex, address, size, STB_GLOBAL, STT_FUNC); + } + } + } + } strtab->End(); // Symbols are buffered and written after names (because they are smaller). - // We could also do two passes in this function to avoid the buffering. - symtab->Start(); - symtab->Write(); - symtab->End(); + symtab->WriteCachedSection(); } } // namespace debug diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h index 43c8de26aa..d0b03ec441 100644 --- a/compiler/debug/method_debug_info.h +++ b/compiler/debug/method_debug_info.h @@ -27,7 +27,7 @@ namespace art { namespace debug { struct MethodDebugInfo { - std::string trampoline_name; + std::string custom_name; const DexFile* dex_file; // Native methods (trampolines) do not reference dex file. size_t class_def_index; uint32_t dex_method_index; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 52cb217980..28c7fe2c34 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -28,6 +28,7 @@ #include "compiled_method.h" #include "dex/dex_file-inl.h" #include "dex/dex_instruction-inl.h" +#include "dex_to_dex_decompiler.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "mirror/dex_cache.h" @@ -44,81 +45,106 @@ const bool kEnableQuickening = true; // Control check-cast elision. const bool kEnableCheckCastEllision = true; -struct QuickenedInfo { - QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} +DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver) + : driver_(driver), + lock_("Quicken lock", kDexToDexCompilerLock) { + DCHECK(driver != nullptr); +} - uint32_t dex_pc; - uint16_t dex_member_index; -}; +void DexToDexCompiler::ClearState() { + MutexLock lock(Thread::Current(), lock_); + active_dex_file_ = nullptr; + active_bit_vector_ = nullptr; + seen_code_items_.clear(); + should_quicken_.clear(); + shared_code_items_.clear(); + blacklisted_code_items_.clear(); + shared_code_item_quicken_info_.clear(); +} -class DexCompiler { - public: - DexCompiler(art::CompilerDriver& compiler, - const DexCompilationUnit& unit, - DexToDexCompilationLevel dex_to_dex_compilation_level) - : driver_(compiler), - unit_(unit), - dex_to_dex_compilation_level_(dex_to_dex_compilation_level) {} +size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const { + MutexLock lock(self, lock_); + return seen_code_items_.size(); +} - ~DexCompiler() {} +BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) { + if (active_dex_file_ != dex_file) { + active_dex_file_ = dex_file; + auto inserted = should_quicken_.emplace(dex_file, + BitVector(dex_file->NumMethodIds(), + /*expandable*/ false, + Allocator::GetMallocAllocator())); + active_bit_vector_ = &inserted.first->second; + } + return active_bit_vector_; +} - void Compile(); +void DexToDexCompiler::MarkForCompilation(Thread* self, + const MethodReference& method_ref, + const DexFile::CodeItem* code_item) { + MutexLock lock(self, lock_); + BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file); + DCHECK(bitmap != nullptr); + DCHECK(!bitmap->IsBitSet(method_ref.index)); + bitmap->SetBit(method_ref.index); + // Detect the shared code items. + if (!seen_code_items_.insert(code_item).second) { + shared_code_items_.insert(code_item); + } +} - const std::vector<QuickenedInfo>& GetQuickenedInfo() const { - return quickened_info_; +DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler, + const DexCompilationUnit& unit, + const CompilationLevel compilation_level, + const std::vector<uint8_t>* quicken_data) + : compiler_(compiler), + driver_(*compiler->GetDriver()), + unit_(unit), + compilation_level_(compilation_level), + already_quickened_(quicken_data != nullptr), + existing_quicken_info_(already_quickened_ + ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {} + +uint16_t DexToDexCompiler::CompilationState::NextIndex() { + DCHECK(already_quickened_); + if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) { + for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) { + LOG(ERROR) << pair->DumpString(nullptr); + } + LOG(FATAL) << "Mismatched number of quicken slots."; } + const uint16_t ret = existing_quicken_info_.GetData(quicken_index_); + quicken_index_++; + return ret; +} - private: - const DexFile& GetDexFile() const { - return *unit_.GetDexFile(); +uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst, + uint32_t index) { + if (UNLIKELY(already_quickened_)) { + return inst->IsQuickened() ? NextIndex() : index; } + DCHECK(!inst->IsQuickened()); + return index; +} + +bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) { + // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only + // only call ShouldCompileMethod on one dex at a time. + MutexLock lock(Thread::Current(), lock_); + return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index); +} - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where - // a barrier is required. - void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); - - // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In - // this case, returns the second NOP instruction pointer. Otherwise, returns - // the given "inst". - Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); - - // Compiles a field access into a quick field access. - // The field index is replaced by an offset within an Object where we can read - // from / write to this field. Therefore, this does not involve any resolution - // at runtime. - // Since the field index is encoded with 16 bits, we can replace it only if the - // field offset can be encoded with 16 bits too. - void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_put); - - // Compiles a virtual method invocation into a quick virtual method invocation. - // The method index is replaced by the vtable index where the corresponding - // Executable can be found. Therefore, this does not involve any resolution - // at runtime. - // Since the method index is encoded with 16 bits, we can replace it only if the - // vtable index can be encoded with 16 bits too. - void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range); - - CompilerDriver& driver_; - const DexCompilationUnit& unit_; - const DexToDexCompilationLevel dex_to_dex_compilation_level_; - - // Filled by the compiler when quickening, in order to encode that information - // in the .oat file. The runtime will use that information to get to the original - // opcodes. - std::vector<QuickenedInfo> quickened_info_; - - DISALLOW_COPY_AND_ASSIGN(DexCompiler); -}; - -void DexCompiler::Compile() { - DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize); - IterationRange<DexInstructionIterator> instructions(unit_.GetCodeItemAccessor().begin(), - unit_.GetCodeItemAccessor().end()); +std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() { + DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize); + const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor(); for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) { const uint32_t dex_pc = it.DexPc(); Instruction* inst = const_cast<Instruction*>(&it.Inst()); + + if (!already_quickened_) { + DCHECK(!inst->IsQuickened()); + } + switch (inst->Opcode()) { case Instruction::RETURN_VOID: CompileReturnVoid(inst, dex_pc); @@ -134,84 +160,147 @@ void DexCompiler::Compile() { break; case Instruction::IGET: + case Instruction::IGET_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false); break; case Instruction::IGET_WIDE: + case Instruction::IGET_WIDE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false); break; case Instruction::IGET_OBJECT: + case Instruction::IGET_OBJECT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false); break; case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BOOLEAN_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false); break; case Instruction::IGET_BYTE: + case Instruction::IGET_BYTE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false); break; case Instruction::IGET_CHAR: + case Instruction::IGET_CHAR_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false); break; case Instruction::IGET_SHORT: + case Instruction::IGET_SHORT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false); break; case Instruction::IPUT: + case Instruction::IPUT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true); break; case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BOOLEAN_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true); break; case Instruction::IPUT_BYTE: + case Instruction::IPUT_BYTE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true); break; case Instruction::IPUT_CHAR: + case Instruction::IPUT_CHAR_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true); break; case Instruction::IPUT_SHORT: + case Instruction::IPUT_SHORT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true); break; case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true); break; case Instruction::IPUT_OBJECT: + case Instruction::IPUT_OBJECT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true); break; case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false); break; case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true); break; case Instruction::NOP: - // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid - // index in the map for normal nops. This should be rare in real code. - quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16)); + if (already_quickened_) { + const uint16_t reference_index = NextIndex(); + quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index)); + if (reference_index == DexFile::kDexNoIndex16) { + // This means it was a normal nop and not a check-cast. + break; + } + const uint16_t type_index = NextIndex(); + if (driver_.IsSafeCast(&unit_, dex_pc)) { + quickened_info_.push_back(QuickenedInfo(dex_pc, type_index)); + } + ++it; + } else { + // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid + // index in the map for normal nops. This should be rare in real code. + quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16)); + } break; default: - DCHECK(!inst->IsQuickened()); // Nothing to do. break; } } + + if (already_quickened_) { + DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices()); + } + + if (GetQuickenedInfo().empty()) { + // No need to create a CompiledMethod if there are no quickened opcodes. + return std::vector<uint8_t>(); + } + + std::vector<uint8_t> quicken_data; + if (kIsDebugBuild) { + // Double check that the counts line up with the size of the quicken info. + size_t quicken_count = 0; + for (const DexInstructionPcPair& pair : instructions) { + if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { + ++quicken_count; + } + } + CHECK_EQ(quicken_count, GetQuickenedInfo().size()); + } + + QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size()); + // Length is encoded by the constructor. + for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) { + // Dex pc is not serialized, only used for checking the instructions. Since we access the + // array based on the index of the quickened instruction, the indexes must line up perfectly. + // The reader side uses the NeedsIndexForInstruction function too. + const Instruction& inst = instructions.InstructionAt(info.dex_pc); + CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode(); + builder.AddIndex(info.dex_member_index); + } + DCHECK(!quicken_data.empty()); + return quicken_data; } -void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { +void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID); if (unit_.IsConstructor()) { // Are we compiling a non clinit constructor which needs a barrier ? @@ -229,7 +318,8 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER); } -Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { +Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst, + uint32_t dex_pc) { if (!kEnableCheckCastEllision) { return inst; } @@ -246,27 +336,30 @@ Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { << " by replacing it with 2 NOPs at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c())); - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c())); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. - // Get to next instruction which is the second half of check-cast and replace - // it by a NOP. - inst = const_cast<Instruction*>(inst->Next()); - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. + if (!already_quickened_) { + quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c())); + quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c())); + + // We are modifying 4 consecutive bytes. + inst->SetOpcode(Instruction::NOP); + inst->SetVRegA_10x(0u); // keep compliant with verifier. + // Get to next instruction which is the second half of check-cast and replace + // it by a NOP. + inst = const_cast<Instruction*>(inst->Next()); + inst->SetOpcode(Instruction::NOP); + inst->SetVRegA_10x(0u); // keep compliant with verifier. + } return inst; } -void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, - uint32_t dex_pc, - Instruction::Code new_opcode, - bool is_put) { +void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst, + uint32_t dex_pc, + Instruction::Code new_opcode, + bool is_put) { if (!kEnableQuickening) { return; } - uint32_t field_idx = inst->VRegC_22c(); + uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c()); MemberOffset field_offset(0u); bool is_volatile; bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put, @@ -278,20 +371,29 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, << " by field offset " << field_offset.Int32Value() << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace field index by field offset. - inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + if (!already_quickened_) { + // We are modifying 4 consecutive bytes. + inst->SetOpcode(new_opcode); + // Replace field index by field offset. + inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + } quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx)); } } -void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range) { +const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const { + return *unit_.GetDexFile(); +} + +void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst, + uint32_t dex_pc, + Instruction::Code new_opcode, + bool is_range) { if (!kEnableQuickening) { return; } - uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); + uint32_t method_idx = GetIndexForInstruction(inst, + is_range ? inst->VRegB_3rc() : inst->VRegB_35c()); ScopedObjectAccess soa(Thread::Current()); ClassLinker* class_linker = unit_.GetClassLinker(); @@ -318,19 +420,20 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, << " by vtable index " << vtable_idx << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace method index by vtable index. - if (is_range) { - inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx)); - } else { - inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); + if (!already_quickened_) { + // We are modifying 4 consecutive bytes. + inst->SetOpcode(new_opcode); + // Replace method index by vtable index. + if (is_range) { + inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx)); + } else { + inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); + } } quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx)); } -CompiledMethod* ArtCompileDEX( - CompilerDriver* driver, +CompiledMethod* DexToDexCompiler::CompileMethod( const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type ATTRIBUTE_UNUSED, @@ -338,69 +441,122 @@ CompiledMethod* ArtCompileDEX( uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level) { - DCHECK(driver != nullptr); - if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - art::DexCompilationUnit unit( - class_loader, - class_linker, - dex_file, - code_item, - class_def_idx, - method_idx, - access_flags, - driver->GetVerifiedMethod(&dex_file, method_idx), - hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file))); - art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level); - dex_compiler.Compile(); - if (dex_compiler.GetQuickenedInfo().empty()) { - // No need to create a CompiledMethod if there are no quickened opcodes. + CompilationLevel compilation_level) { + if (compilation_level == CompilationLevel::kDontDexToDexCompile) { + return nullptr; + } + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + art::DexCompilationUnit unit( + class_loader, + class_linker, + dex_file, + code_item, + class_def_idx, + method_idx, + access_flags, + driver_->GetVerifiedMethod(&dex_file, method_idx), + hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file))); + + std::vector<uint8_t> quicken_data; + // If the code item is shared with multiple different method ids, make sure that we quicken only + // once and verify that all the dequicken maps match. + if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) { + // For shared code items, use a lock to prevent races. + MutexLock mu(soa.Self(), lock_); + // Blacklisted means there was a quickening conflict previously, bail early. + if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) { return nullptr; } + auto existing = shared_code_item_quicken_info_.find(code_item); + const bool already_quickened = existing != shared_code_item_quicken_info_.end(); + { + CompilationState state(this, + unit, + compilation_level, + already_quickened ? &existing->second.quicken_data_ : nullptr); + quicken_data = state.Compile(); + } - // Create a `CompiledMethod`, with the quickened information in the vmap table. - if (kIsDebugBuild) { - // Double check that the counts line up with the size of the quicken info. - size_t quicken_count = 0; - for (const DexInstructionPcPair& pair : unit.GetCodeItemAccessor()) { - if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { - ++quicken_count; + // Already quickened, check that the data matches what was previously seen. + MethodReference method_ref(&dex_file, method_idx); + if (already_quickened) { + QuickenState* const existing_data = &existing->second; + if (existing_data->quicken_data_ != quicken_data) { + VLOG(compiler) << "Quicken data mismatch, dequickening method " + << dex_file.PrettyMethod(method_idx); + // Unquicken using the existing quicken data. + optimizer::ArtDecompileDEX(dex_file, + *code_item, + ArrayRef<const uint8_t>(existing_data->quicken_data_), + /* decompile_return_instruction*/ false); + // Go clear the vmaps for all the methods that were already quickened to avoid writing them + // out during oat writing. + for (const MethodReference& ref : existing_data->methods_) { + CompiledMethod* method = driver_->GetCompiledMethod(ref); + DCHECK(method != nullptr); + method->ReleaseVMapTable(); } + // Blacklist the method to never attempt to quicken it in the future. + blacklisted_code_items_.insert(code_item); + shared_code_item_quicken_info_.erase(existing); + return nullptr; } - CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size()); + existing_data->methods_.push_back(method_ref); + } else { + QuickenState new_state; + new_state.methods_.push_back(method_ref); + new_state.quicken_data_ = quicken_data; + bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second; + CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx); } - std::vector<uint8_t> quicken_data; - for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) { - // Dex pc is not serialized, only used for checking the instructions. Since we access the - // array based on the index of the quickened instruction, the indexes must line up perfectly. - // The reader side uses the NeedsIndexForInstruction function too. - const Instruction& inst = unit.GetCodeItemAccessor().InstructionAt(info.dex_pc); - CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode(); - // Add the index. - quicken_data.push_back(static_cast<uint8_t>(info.dex_member_index >> 0)); - quicken_data.push_back(static_cast<uint8_t>(info.dex_member_index >> 8)); + + // Easy sanity check is to check that the existing stuff matches by re-quickening using the + // newly produced quicken data. + // Note that this needs to be behind the lock for this case since we may unquicken in another + // thread. + if (kIsDebugBuild) { + CompilationState state2(this, unit, compilation_level, &quicken_data); + std::vector<uint8_t> new_data = state2.Compile(); + CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; } - InstructionSet instruction_set = driver->GetInstructionSet(); - if (instruction_set == InstructionSet::kThumb2) { - // Don't use the thumb2 instruction set to avoid the one off code delta. - instruction_set = InstructionSet::kArm; + } else { + CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr); + quicken_data = state.Compile(); + + // Easy sanity check is to check that the existing stuff matches by re-quickening using the + // newly produced quicken data. + if (kIsDebugBuild) { + CompilationState state2(this, unit, compilation_level, &quicken_data); + std::vector<uint8_t> new_data = state2.Compile(); + CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; } - return CompiledMethod::SwapAllocCompiledMethod( - driver, - instruction_set, - ArrayRef<const uint8_t>(), // no code - 0, - 0, - 0, - ArrayRef<const uint8_t>(), // method_info - ArrayRef<const uint8_t>(quicken_data), // vmap_table - ArrayRef<const uint8_t>(), // cfi data - ArrayRef<const linker::LinkerPatch>()); } - return nullptr; + + if (quicken_data.empty()) { + return nullptr; + } + + // Create a `CompiledMethod`, with the quickened information in the vmap table. + InstructionSet instruction_set = driver_->GetInstructionSet(); + if (instruction_set == InstructionSet::kThumb2) { + // Don't use the thumb2 instruction set to avoid the one off code delta. + instruction_set = InstructionSet::kArm; + } + CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod( + driver_, + instruction_set, + ArrayRef<const uint8_t>(), // no code + 0, + 0, + 0, + ArrayRef<const uint8_t>(), // method_info + ArrayRef<const uint8_t>(quicken_data), // vmap_table + ArrayRef<const uint8_t>(), // cfi data + ArrayRef<const linker::LinkerPatch>()); + return ret; } } // namespace optimizer diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 80b94d2dc3..abd048167c 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -17,14 +17,22 @@ #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ +#include <set> +#include <unordered_map> +#include <unordered_set> + +#include "base/bit_vector.h" #include "dex/dex_file.h" #include "handle.h" #include "invoke_type.h" +#include "method_reference.h" +#include "quicken_info.h" namespace art { class CompiledMethod; class CompilerDriver; +class DexCompilationUnit; namespace mirror { class ClassLoader; @@ -32,21 +40,144 @@ class ClassLoader; namespace optimizer { -enum class DexToDexCompilationLevel { - kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kOptimize // Perform peep-hole optimizations. +class DexToDexCompiler { + public: + enum class CompilationLevel { + kDontDexToDexCompile, // Only meaning wrt image time interpretation. + kOptimize // Perform peep-hole optimizations. + }; + + explicit DexToDexCompiler(CompilerDriver* driver); + + CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + const CompilationLevel compilation_level) WARN_UNUSED; + + void MarkForCompilation(Thread* self, + const MethodReference& method_ref, + const DexFile::CodeItem* code_item); + + void ClearState(); + + CompilerDriver* GetDriver() { + return driver_; + } + + bool ShouldCompileMethod(const MethodReference& ref); + + size_t NumUniqueCodeItems(Thread* self) const; + + private: + // Holds the state for compiling a single method. + struct CompilationState { + struct QuickenedInfo { + QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} + + uint32_t dex_pc; + uint16_t dex_member_index; + }; + + CompilationState(DexToDexCompiler* compiler, + const DexCompilationUnit& unit, + const CompilationLevel compilation_level, + const std::vector<uint8_t>* quicken_data); + + const std::vector<QuickenedInfo>& GetQuickenedInfo() const { + return quickened_info_; + } + + // Returns the quickening info, or an empty array if it was not quickened. + // If already_quickened is true, then don't change anything but still return what the quicken + // data would have been. + std::vector<uint8_t> Compile(); + + const DexFile& GetDexFile() const; + + // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where + // a barrier is required. + void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); + + // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In + // this case, returns the second NOP instruction pointer. Otherwise, returns + // the given "inst". + Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); + + // Compiles a field access into a quick field access. + // The field index is replaced by an offset within an Object where we can read + // from / write to this field. Therefore, this does not involve any resolution + // at runtime. + // Since the field index is encoded with 16 bits, we can replace it only if the + // field offset can be encoded with 16 bits too. + void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_put); + + // Compiles a virtual method invocation into a quick virtual method invocation. + // The method index is replaced by the vtable index where the corresponding + // executable can be found. Therefore, this does not involve any resolution + // at runtime. + // Since the method index is encoded with 16 bits, we can replace it only if the + // vtable index can be encoded with 16 bits too. + void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_range); + + // Return the next index. + uint16_t NextIndex(); + + // Returns the dequickened index if an instruction is quickened, otherwise return index. + uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index); + + DexToDexCompiler* const compiler_; + CompilerDriver& driver_; + const DexCompilationUnit& unit_; + const CompilationLevel compilation_level_; + + // Filled by the compiler when quickening, in order to encode that information + // in the .oat file. The runtime will use that information to get to the original + // opcodes. + std::vector<QuickenedInfo> quickened_info_; + + // If the code item was already quickened previously. + const bool already_quickened_; + const QuickenInfoTable existing_quicken_info_; + uint32_t quicken_index_ = 0u; + + DISALLOW_COPY_AND_ASSIGN(CompilationState); + }; + + struct QuickenState { + std::vector<MethodReference> methods_; + std::vector<uint8_t> quicken_data_; + }; + + BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_); + + CompilerDriver* const driver_; + + // State for adding methods (should this be in its own class?). + const DexFile* active_dex_file_ = nullptr; + BitVector* active_bit_vector_ = nullptr; + + // Lock that guards duplicate code items and the bitmap. + mutable Mutex lock_; + // Record what method references are going to get quickened. + std::unordered_map<const DexFile*, BitVector> should_quicken_; + // Record what code items are already seen to detect when multiple methods have the same code + // item. + std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_); + // Guarded by lock_ during writing, accessed without a lock during quickening. + // This is safe because no thread is adding to the shared code items during the quickening phase. + std::unordered_set<const DexFile::CodeItem*> shared_code_items_; + std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_); + std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_ + GUARDED_BY(lock_); }; -std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); - -CompiledMethod* ArtCompileDEX(CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level); + +std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs); } // namespace optimizer diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index c8c2b6998f..48477abe5b 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -137,16 +137,7 @@ class CompiledMethodStorage::DedupeHashFunc { return hash; } else { - size_t hash = 0x811c9dc5; - for (uint32_t i = 0; i < len; ++i) { - hash = (hash * 16777619) ^ data[i]; - } - hash += hash << 13; - hash ^= hash >> 7; - hash += hash << 3; - hash ^= hash >> 17; - hash += hash << 5; - return hash; + return HashBytes(data, len); } } }; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index fe83a66d0f..6c5cc50269 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -255,24 +255,6 @@ class CompilerDriver::AOTCompilationStats { DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats); }; -class CompilerDriver::DexFileMethodSet { - public: - explicit DexFileMethodSet(const DexFile& dex_file) - : dex_file_(dex_file), - method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) { - } - DexFileMethodSet(DexFileMethodSet&& other) = default; - - const DexFile& GetDexFile() const { return dex_file_; } - - BitVector& GetMethodIndexes() { return method_indexes_; } - const BitVector& GetMethodIndexes() const { return method_indexes_; } - - private: - const DexFile& dex_file_; - BitVector method_indexes_; -}; - CompilerDriver::CompilerDriver( const CompilerOptions* compiler_options, VerificationResults* verification_results, @@ -306,9 +288,8 @@ CompilerDriver::CompilerDriver( compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), max_arena_alloc_(0), - dex_to_dex_references_lock_("dex-to-dex references lock"), - dex_to_dex_references_(), - current_dex_to_dex_methods_(nullptr) { + compiling_dex_to_dex_(false), + dex_to_dex_compiler_(this) { DCHECK(compiler_options_ != nullptr); compiler_->Init(); @@ -398,7 +379,7 @@ void CompilerDriver::CompileAll(jobject class_loader, FreeThreadPools(); } -static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( +static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, const DexFile::ClassDef& class_def) REQUIRES_SHARED(Locks::mutator_lock_) { @@ -410,7 +391,7 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( if (klass == nullptr) { CHECK(self->IsExceptionPending()); self->ClearException(); - return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic // references with actual offsets. We cannot re-verify such instructions. @@ -418,26 +399,23 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( // We store the verification information in the class status in the oat file, which the linker // can validate (checksums) and use to skip load-time verification. It is thus safe to // optimize when a class has been fully verified before. - optimizer::DexToDexCompilationLevel max_level = optimizer::DexToDexCompilationLevel::kOptimize; + optimizer::DexToDexCompiler::CompilationLevel max_level = + optimizer::DexToDexCompiler::CompilationLevel::kOptimize; if (driver.GetCompilerOptions().GetDebuggable()) { // We are debuggable so definitions of classes might be changed. We don't want to do any // optimizations that could break that. - max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; - } - if (!VdexFile::CanEncodeQuickenedData(dex_file)) { - // Don't do any dex level optimizations if we cannot encode the quickening. - return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } if (klass->IsVerified()) { // Class is verified so we can enable DEX-to-DEX compilation for performance. return max_level; } else { // Class verification has failed: do not run DEX-to-DEX optimizations. - return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } } -static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( +static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( Thread* self, const CompilerDriver& driver, jobject jclass_loader, @@ -474,7 +452,7 @@ static void CompileMethod(Thread* self, uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, bool compilation_enabled, Handle<mirror::DexCache> dex_cache) { DCHECK(driver != nullptr); @@ -482,18 +460,18 @@ static void CompileMethod(Thread* self, uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; MethodReference method_ref(&dex_file, method_idx); - if (driver->GetCurrentDexToDexMethods() != nullptr) { + if (driver->GetCompilingDexToDex()) { + optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); // This is the second pass when we dex-to-dex compile previously marked methods. // TODO: Refactor the compilation to avoid having to distinguish the two passes // here. That should be done on a higher level. http://b/29089975 - if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) { + if (compiler->ShouldCompileMethod(method_ref)) { VerificationResults* results = driver->GetVerificationResults(); DCHECK(results != nullptr); const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); // Do not optimize if a VerifiedMethod is missing. SafeCast elision, // for example, relies on it. - compiled_method = optimizer::ArtCompileDEX( - driver, + compiled_method = compiler->CompileMethod( code_item, access_flags, invoke_type, @@ -503,7 +481,7 @@ static void CompileMethod(Thread* self, dex_file, (verified_method != nullptr) ? dex_to_dex_compilation_level - : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile); + : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); } } else if ((access_flags & kAccNative) != 0) { // Are we extracting only and have support for generic JNI down calls? @@ -528,7 +506,7 @@ static void CompileMethod(Thread* self, bool compile = compilation_enabled && // Basic checks, e.g., not <clinit>. results->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadata. + // Did not fail to create VerifiedMethod metadcata. verified_method != nullptr && // Do not have failures that should punt to the interpreter. !verified_method->HasRuntimeThrow() && @@ -550,10 +528,12 @@ static void CompileMethod(Thread* self, dex_cache); } if (compiled_method == nullptr && - dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) { + dex_to_dex_compilation_level != + optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { DCHECK(!Runtime::Current()->UseJitCompilation()); + DCHECK(!driver->GetCompilingDexToDex()); // TODO: add a command-line option to disable DEX-to-DEX compilation ? - driver->MarkForDexToDexCompilation(self, method_ref); + driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item); } } if (kTimeCompileMethod) { @@ -620,14 +600,14 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t PreCompile(jclass_loader, dex_files, timings); // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = GetDexToDexCompilationLevel(self, *this, jclass_loader, *dex_file, dex_file->GetClassDef(class_def_idx)); - DCHECK(current_dex_to_dex_methods_ == nullptr); + DCHECK(!compiling_dex_to_dex_); CompileMethod(self, this, code_item, @@ -641,19 +621,10 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t true, dex_cache); - ArrayRef<DexFileMethodSet> dex_to_dex_references; - { - // From this point on, we shall not modify dex_to_dex_references_, so - // just grab a reference to it that we use without holding the mutex. - MutexLock lock(Thread::Current(), dex_to_dex_references_lock_); - dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_); - } - if (!dex_to_dex_references.empty()) { - DCHECK_EQ(dex_to_dex_references.size(), 1u); - DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file); - current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes(); - DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx)); - DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u); + const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self); + if (num_methods != 0) { + DCHECK_EQ(num_methods, 1u); + compiling_dex_to_dex_ = true; CompileMethod(self, this, code_item, @@ -666,7 +637,8 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t dex_to_dex_compilation_level, true, dex_cache); - current_dex_to_dex_methods_ = nullptr; + compiling_dex_to_dex_ = false; + dex_to_dex_compiler_.ClearState(); } FreeThreadPools(); @@ -711,7 +683,7 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, } ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(&dex_file, code_item)) { + for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { switch (inst->Opcode()) { case Instruction::CONST_STRING: case Instruction::CONST_STRING_JUMBO: { @@ -1284,17 +1256,6 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { return IsImageClass(descriptor); } -void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) { - MutexLock lock(self, dex_to_dex_references_lock_); - // Since we're compiling one dex file at a time, we need to look for the - // current dex file entry only at the end of dex_to_dex_references_. - if (dex_to_dex_references_.empty() || - &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) { - dex_to_dex_references_.emplace_back(*method_ref.dex_file); - } - dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.index); -} - bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, ObjPtr<mirror::Class> resolved_class) { if (resolved_class == nullptr) { @@ -2616,14 +2577,8 @@ void CompilerDriver::Compile(jobject class_loader, : profile_compilation_info_->DumpInfo(&dex_files)); } - current_dex_to_dex_methods_ = nullptr; - Thread* const self = Thread::Current(); - { - // Clear in case we aren't the first call to Compile. - MutexLock mu(self, dex_to_dex_references_lock_); - dex_to_dex_references_.clear(); - } - + dex_to_dex_compiler_.ClearState(); + compiling_dex_to_dex_ = false; for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); CompileDexFile(class_loader, @@ -2638,23 +2593,21 @@ void CompilerDriver::Compile(jobject class_loader, Runtime::Current()->ReclaimArenaPoolMemory(); } - ArrayRef<DexFileMethodSet> dex_to_dex_references; - { - // From this point on, we shall not modify dex_to_dex_references_, so - // just grab a reference to it that we use without holding the mutex. - MutexLock lock(self, dex_to_dex_references_lock_); - dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_); - } - for (const auto& method_set : dex_to_dex_references) { - current_dex_to_dex_methods_ = &method_set.GetMethodIndexes(); - CompileDexFile(class_loader, - method_set.GetDexFile(), - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); + if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) { + compiling_dex_to_dex_ = true; + // TODO: Not visit all of the dex files, its probably rare that only one would have quickened + // methods though. + for (const DexFile* dex_file : dex_files) { + CompileDexFile(class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings); + } + dex_to_dex_compiler_.ClearState(); + compiling_dex_to_dex_ = false; } - current_dex_to_dex_methods_ = nullptr; VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } @@ -2705,7 +2658,7 @@ class CompileClassVisitor : public CompilationVisitor { CompilerDriver* const driver = manager_->GetCompiler(); // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def); ClassDataItemIterator it(dex_file, class_data); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index ef16212fb7..87a8a186c1 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -35,6 +35,7 @@ #include "compiler.h" #include "dex/dex_file.h" #include "dex/dex_file_types.h" +#include "dex/dex_to_dex_compiler.h" #include "driver/compiled_method_storage.h" #include "jit/profile_compilation_info.h" #include "method_reference.h" @@ -120,12 +121,11 @@ class CompilerDriver { void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_, !dex_to_dex_references_lock_); + REQUIRES(!Locks::mutator_lock_); // Compile a single Method. void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!dex_to_dex_references_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); VerificationResults* GetVerificationResults() const; @@ -362,13 +362,6 @@ class CompilerDriver { return true; } - void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) - REQUIRES(!dex_to_dex_references_lock_); - - const BitVector* GetCurrentDexToDexMethods() const { - return current_dex_to_dex_methods_; - } - const ProfileCompilationInfo* GetProfileCompilationInfo() const { return profile_compilation_info_; } @@ -381,6 +374,14 @@ class CompilerDriver { || android::base::EndsWith(boot_image_filename, "core-optimizing.art"); } + bool GetCompilingDexToDex() const { + return compiling_dex_to_dex_; + } + + optimizer::DexToDexCompiler& GetDexToDexCompiler() { + return dex_to_dex_compiler_; + } + private: void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, @@ -447,7 +448,7 @@ class CompilerDriver { void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_); + TimingLogger* timings); void CompileDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -539,14 +540,9 @@ class CompilerDriver { size_t max_arena_alloc_; - // Data for delaying dex-to-dex compilation. - Mutex dex_to_dex_references_lock_; - // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation. - class DexFileMethodSet; - std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_); - // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method - // indexes for dex-to-dex compilation in the current dex file. - const BitVector* current_dex_to_dex_methods_; + // Compiler for dex to dex (quickening). + bool compiling_dex_to_dex_; + optimizer::DexToDexCompiler dex_to_dex_compiler_; friend class CompileClassVisitor; friend class DexToDexDecompilerTest; diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc index 1fe30de355..28e68c94df 100644 --- a/compiler/driver/dex_compilation_unit.cc +++ b/compiler/driver/dex_compilation_unit.cc @@ -40,8 +40,7 @@ DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, access_flags_(access_flags), verified_method_(verified_method), dex_cache_(dex_cache), - code_item_accessor_(&dex_file, code_item) { -} + code_item_accessor_(dex_file, code_item) {} const std::string& DexCompilationUnit::GetSymbol() { if (symbol_.empty()) { diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index 8f7ab05791..7bacacf91d 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -130,7 +130,7 @@ class ExceptionTest : public CommonRuntimeTest { TEST_F(ExceptionTest, FindCatchHandler) { ScopedObjectAccess soa(Thread::Current()); - CodeItemDataAccessor accessor(dex_, dex_->GetCodeItem(method_f_->GetCodeItemOffset())); + CodeItemDataAccessor accessor(*dex_, dex_->GetCodeItem(method_f_->GetCodeItemOffset())); ASSERT_TRUE(accessor.HasCodeItem()); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 88e3e5b230..2c62095458 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -76,6 +76,7 @@ extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t cou const ArrayRef<mirror::Class*> types_array(types, count); std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array); + MutexLock mu(Thread::Current(), g_jit_debug_mutex); CreateJITCodeEntry(std::move(elf_file)); } } diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index cedbe5d97f..6e0286afac 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -250,12 +250,12 @@ std::vector<debug::MethodDebugInfo> ArmBaseRelativePatcher::GenerateThunkDebugIn for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) { debug::MethodDebugInfo info = {}; if (i == 0u) { - info.trampoline_name = base_name; + info.custom_name = base_name; } else { // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_` // keeps records also for thunks in previous oat files, names based on the thunk // index shall be unique across the whole multi-oat output. - info.trampoline_name = base_name + "_" + std::to_string(i); + info.custom_name = base_name + "_" + std::to_string(i); } info.isa = instruction_set_; info.is_code_address_text_relative = true; diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h index aa3cd98595..1c875189c5 100644 --- a/compiler/linker/elf_builder.h +++ b/compiler/linker/elf_builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_LINKER_ELF_BUILDER_H_ #include <vector> +#include <unordered_map> #include "arch/instruction_set.h" #include "arch/mips/instruction_set_features_mips.h" @@ -38,9 +39,10 @@ namespace linker { // Elf_Ehdr - The ELF header. // Elf_Phdr[] - Program headers for the linker. // .note.gnu.build-id - Optional build ID section (SHA-1 digest). -// .rodata - DEX files and oat metadata. +// .rodata - Oat metadata. // .text - Compiled code. // .bss - Zero-initialized writeable section. +// .dex - Reserved NOBITS space for dex-related data. // .MIPS.abiflags - MIPS specific section. // .dynstr - Names for .dynsym. // .dynsym - A few oat-specific dynamic symbols. @@ -195,6 +197,11 @@ class ElfBuilder FINAL { return section_index_; } + // Returns true if this section has been added. + bool Exists() const { + return section_index_ != 0; + } + private: // Add this section to the list of generated ELF sections (if not there already). // It also ensures the alignment is sufficient to generate valid program headers, @@ -310,35 +317,36 @@ class ElfBuilder FINAL { if (current_offset_ == 0) { DCHECK(name.empty()); } - Elf_Word offset = current_offset_; - this->WriteFully(name.c_str(), name.length() + 1); - current_offset_ += name.length() + 1; - return offset; + auto res = written_names_.emplace(name, current_offset_); + if (res.second) { // Inserted. + this->WriteFully(name.c_str(), name.length() + 1); + current_offset_ += name.length() + 1; + } + return res.first->second; // Offset. } private: Elf_Word current_offset_; + std::unordered_map<std::string, Elf_Word> written_names_; // Dedup strings. }; // Writer of .dynsym and .symtab sections. - class SymbolSection FINAL : public CachedSection { + class SymbolSection FINAL : public Section { public: SymbolSection(ElfBuilder<ElfTypes>* owner, const std::string& name, Elf_Word type, Elf_Word flags, Section* strtab) - : CachedSection(owner, - name, - type, - flags, - strtab, - /* info */ 0, - sizeof(Elf_Off), - sizeof(Elf_Sym)) { - // The symbol table always has to start with NULL symbol. - Elf_Sym null_symbol = Elf_Sym(); - CachedSection::Add(&null_symbol, sizeof(null_symbol)); + : Section(owner, + name, + type, + flags, + strtab, + /* info */ 0, + sizeof(Elf_Off), + sizeof(Elf_Sym)) { + syms_.push_back(Elf_Sym()); // The symbol table always has to start with NULL symbol. } // Buffer symbol for this section. It will be written later. @@ -361,6 +369,7 @@ class ElfBuilder FINAL { Add(name, section_index, addr, size, binding, type); } + // Buffer symbol for this section. It will be written later. void Add(Elf_Word name, Elf_Word section_index, Elf_Addr addr, @@ -374,8 +383,19 @@ class ElfBuilder FINAL { sym.st_other = 0; sym.st_shndx = section_index; sym.st_info = (binding << 4) + (type & 0xf); - CachedSection::Add(&sym, sizeof(sym)); + syms_.push_back(sym); + } + + Elf_Word GetCacheSize() { return syms_.size() * sizeof(Elf_Sym); } + + void WriteCachedSection() { + this->Start(); + this->WriteFully(syms_.data(), syms_.size() * sizeof(Elf_Sym)); + this->End(); } + + private: + std::vector<Elf_Sym> syms_; // Buffered/cached content of the whole section. }; class AbiflagsSection FINAL : public Section { @@ -503,6 +523,7 @@ class ElfBuilder FINAL { rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0), bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), + dex_(this, ".dex", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize), dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)), @@ -525,6 +546,7 @@ class ElfBuilder FINAL { virtual_address_(0) { text_.phdr_flags_ = PF_R | PF_X; bss_.phdr_flags_ = PF_R | PF_W; + dex_.phdr_flags_ = PF_R; dynamic_.phdr_flags_ = PF_R | PF_W; dynamic_.phdr_type_ = PT_DYNAMIC; eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME; @@ -538,6 +560,7 @@ class ElfBuilder FINAL { Section* GetRoData() { return &rodata_; } Section* GetText() { return &text_; } Section* GetBss() { return &bss_; } + Section* GetDex() { return &dex_; } StringSection* GetStrTab() { return &strtab_; } SymbolSection* GetSymTab() { return &symtab_; } Section* GetEhFrame() { return &eh_frame_; } @@ -666,7 +689,8 @@ class ElfBuilder FINAL { Elf_Word text_size, Elf_Word bss_size, Elf_Word bss_methods_offset, - Elf_Word bss_roots_offset) { + Elf_Word bss_roots_offset, + Elf_Word dex_size) { std::string soname(elf_file_path); size_t directory_separator_pos = soname.rfind('/'); if (directory_separator_pos != std::string::npos) { @@ -679,6 +703,9 @@ class ElfBuilder FINAL { if (bss_size != 0) { bss_.AllocateVirtualMemory(bss_size); } + if (dex_size != 0) { + dex_.AllocateVirtualMemory(dex_size); + } if (isa_ == InstructionSet::kMips || isa_ == InstructionSet::kMips64) { abiflags_.AllocateVirtualMemory(abiflags_.GetSize()); } @@ -725,6 +752,14 @@ class ElfBuilder FINAL { Elf_Word bsslastword_address = bss_.GetAddress() + bss_size - 4; dynsym_.Add(oatbsslastword, &bss_, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); } + if (dex_size != 0u) { + Elf_Word oatdex = dynstr_.Add("oatdex"); + dynsym_.Add(oatdex, &dex_, dex_.GetAddress(), dex_size, STB_GLOBAL, STT_OBJECT); + Elf_Word oatdexlastword = dynstr_.Add("oatdexlastword"); + Elf_Word oatdexlastword_address = dex_.GetAddress() + dex_size - 4; + dynsym_.Add(oatdexlastword, &dex_, oatdexlastword_address, 4, STB_GLOBAL, STT_OBJECT); + } + Elf_Word soname_offset = dynstr_.Add(soname); // We do not really need a hash-table since there is so few entries. @@ -967,6 +1002,7 @@ class ElfBuilder FINAL { Section rodata_; Section text_; Section bss_; + Section dex_; CachedStringSection dynstr_; SymbolSection dynsym_; CachedSection hash_; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 9c2068ec5e..147df1e3e8 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -302,7 +302,7 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } - bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + bool IsConstantValueRange() const { return lower_.IsConstant() && upper_.IsConstant(); } // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { @@ -789,24 +789,33 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondNE || cond == kCondEQ) { - if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) { - // Special case: - // length == [c,d] yields [c, d] along true - // length != [c,d] yields [c, d] along false - if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { - ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? true_successor : false_successor, new_range); - } - // In addition: - // length == 0 yields [1, max] along false - // length != 0 yields [1, max] along true - if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { - ValueRange* new_range = new (&allocator_) ValueRange( - &allocator_, ValueBound(nullptr, 1), ValueBound::Max()); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + if (left->IsArrayLength()) { + if (lower.IsConstant() && upper.IsConstant()) { + // Special case: + // length == [c,d] yields [c, d] along true + // length != [c,d] yields [c, d] along false + if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); + } + // In addition: + // length == 0 yields [1, max] along false + // length != 0 yields [1, max] along true + if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, 1), ValueBound::Max()); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + } } + } else if (lower.IsRelatedToArrayLength() && lower.Equals(upper)) { + // Special aliasing case, with x not array length itself: + // x == [length,length] yields x == length along true + // x != [length,length] yields x == length along false + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); } } } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index af537dd653..a1a5692ef6 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -43,7 +43,7 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, VariableSizedHandleScope* handles) : graph_(graph), dex_file_(&graph->GetDexFile()), @@ -70,7 +70,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, compiler_driver_(nullptr), code_generator_(nullptr), compilation_stats_(nullptr), - interpreter_metadata_(nullptr), handles_(handles), return_type_(return_type) {} diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c16a3a928d..5a1914ce08 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ #include "base/arena_object.h" +#include "base/array_ref.h" #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" @@ -40,7 +41,7 @@ class HGraphBuilder : public ValueObject { CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, VariableSizedHandleScope* handles); // Only for unit testing. @@ -73,7 +74,7 @@ class HGraphBuilder : public ValueObject { CodeGenerator* const code_generator_; OptimizingCompilerStats* const compilation_stats_; - const uint8_t* const interpreter_metadata_; + const ArrayRef<const uint8_t> interpreter_metadata_; VariableSizedHandleScope* const handles_; const DataType::Type return_type_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 07894fd1b1..01155dcd37 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -911,7 +911,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, } ArenaVector<size_t> covered( loop_headers.size(), 0, graph.GetAllocator()->Adapter(kArenaAllocMisc)); - for (const DexInstructionPcPair& pair : CodeItemInstructionAccessor(&graph.GetDexFile(), + for (const DexInstructionPcPair& pair : CodeItemInstructionAccessor(graph.GetDexFile(), &code_item)) { const uint32_t dex_pc = pair.DexPc(); const Instruction& instruction = pair.Inst(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 13886b32b3..13bbffa1e3 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2097,13 +2097,17 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); // Even if the initialized flag is set, we need to ensure consistent memory ordering. // TODO(vixl): Let the MacroAssembler handle MemOperand. - __ Add(temp, class_reg, status_offset); + __ Add(temp, class_reg, status_byte_offset); __ Ldarb(temp, HeapOperand(temp)); - __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized)); + __ Cmp(temp, shifted_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3487,7 +3491,11 @@ void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 7f8353312f..577fe00dcd 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2776,7 +2776,11 @@ void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poi } void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); @@ -5597,42 +5601,13 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c return Location::RequiresRegister(); } -bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, - Opcode opcode) { - uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); - if (DataType::Is64BitType(input_cst->GetType())) { - Opcode high_opcode = opcode; - SetCc low_set_cc = kCcDontCare; - switch (opcode) { - case SUB: - // Flip the operation to an ADD. - value = -value; - opcode = ADD; - FALLTHROUGH_INTENDED; - case ADD: - if (Low32Bits(value) == 0u) { - return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); - } - high_opcode = ADC; - low_set_cc = kCcSet; - break; - default: - break; - } - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && - CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); - } else { - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); - } -} - -// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization -// enabled. -bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, - Opcode opcode, - SetCc set_cc) { - ArmVIXLAssembler* assembler = codegen_->GetAssembler(); - if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) { +static bool CanEncode32BitConstantAsImmediate( + CodeGeneratorARMVIXL* codegen, + uint32_t value, + Opcode opcode, + vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) { return true; } Opcode neg_opcode = kNoOperand; @@ -5649,13 +5624,41 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, return false; } - if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) { + if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) { return true; } return opcode == AND && IsPowerOfTwo(value + 1); } +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); + if (DataType::Is64BitType(input_cst->GetType())) { + Opcode high_opcode = opcode; + vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode); + } + high_opcode = ADC; + low_flags_update = vixl32::FlagsUpdate::SetFlags; + break; + default: + break; + } + return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) && + CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update); + } else { + return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode); + } +} + void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -7173,11 +7176,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadFromOffset(kLoadUnsignedByte, - temp, - class_reg, - mirror::Class::StatusOffset().Int32Value()); - __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset); + __ Cmp(temp, shifted_initialized_value); __ B(lo, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we may be in a situation where caches are not synced // properly. Therefore, we do a memory fence. @@ -8131,13 +8137,11 @@ void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out, return; } __ Adds(out_low, first_low, value_low); - if (GetAssembler()->ShifterOperandCanHold(ADC, value_high, kCcDontCare)) { + if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) { __ Adc(out_high, first_high, value_high); - } else if (GetAssembler()->ShifterOperandCanHold(SBC, ~value_high, kCcDontCare)) { - __ Sbc(out_high, first_high, ~value_high); } else { - LOG(FATAL) << "Unexpected constant " << value_high; - UNREACHABLE(); + DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high)); + __ Sbc(out_high, first_high, ~value_high); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index c46d17ccec..38570bb0fe 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -287,7 +287,6 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); - bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARMVIXL* const codegen_; InvokeDexCallingConventionVisitorARMVIXL parameter_visitor_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index ebe252a9c8..5c8e46ed19 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1915,8 +1915,14 @@ void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool d void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg) { - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ LoadConst32(AT, shifted_initialized_value); __ Bltu(TMP, AT, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); @@ -4028,7 +4034,11 @@ void LocationsBuilderMIPS::VisitGoto(HGoto* got) { } void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3ea7b827bb..bcfe051c90 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1761,8 +1761,14 @@ void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg) { - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ LoadConst32(AT, shifted_initialized_value); __ Bltuc(TMP, AT, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); @@ -3556,7 +3562,11 @@ void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index ad8128a5b1..7b4b85d2fe 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -92,8 +92,8 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ pshufd(dst, dst, Immediate(0)); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); __ punpckldq(dst, tmp); @@ -101,13 +101,13 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i break; } case DataType::Type::kFloat32: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufps(dst, dst, Immediate(0)); break; case DataType::Type::kFloat64: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufpd(dst, dst, Immediate(0)); break; default: @@ -160,8 +160,8 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr __ movd(locations->Out().AsRegister<Register>(), src); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(locations->Out().AsRegisterPairLow<Register>(), src); __ pshufd(tmp, src, Immediate(1)); __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); @@ -1022,8 +1022,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movd(dst, locations->InAt(0).AsRegister<Register>()); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ xorps(tmp, tmp); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 68532386e1..cbe9e0a35c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1347,7 +1347,10 @@ void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* loc } void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); @@ -6219,8 +6222,13 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(enum_cast<>(ClassStatus::kInitialized))); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the X86 memory model. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 1f8d822507..510eec4f30 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1449,7 +1449,10 @@ void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* } void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); @@ -5425,8 +5428,13 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(enum_cast<>(ClassStatus::kInitialized))); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the x86-64 memory model. diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index d8ebac95a8..f4760d661f 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -34,7 +34,9 @@ void CodeSinking::Run() { // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { - if (exit_predecessor->GetLastInstruction()->IsThrow()) { + HInstruction* last = exit_predecessor->GetLastInstruction(); + // Any predecessor of the exit that does not return, throws an exception. + if (!last->IsReturn() && !last->IsReturnVoid()) { SinkCodeToUncommonBranch(exit_predecessor); } } diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6eda289861..ba4040acad 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -74,8 +74,8 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { class CodegenTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0); - void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected); + void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0); + void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected); void TestComparison(IfCondition condition, int64_t i, int64_t j, @@ -83,7 +83,7 @@ class CodegenTest : public OptimizingUnitTest { const CodegenTargetConfig target_config); }; -void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expected) { +void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ResetPoolAndAllocator(); HGraph* graph = CreateCFG(data); @@ -93,7 +93,8 @@ void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expect } } -void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { +void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, + bool has_result, int64_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ResetPoolAndAllocator(); HGraph* graph = CreateCFG(data, DataType::Type::kInt64); @@ -104,12 +105,12 @@ void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t ex } TEST_F(CodegenTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); TestCode(data); } TEST_F(CodegenTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -117,7 +118,7 @@ TEST_F(CodegenTest, CFG1) { } TEST_F(CodegenTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -126,21 +127,21 @@ TEST_F(CodegenTest, CFG2) { } TEST_F(CodegenTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -149,7 +150,7 @@ TEST_F(CodegenTest, CFG3) { } TEST_F(CodegenTest, CFG4) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -158,7 +159,7 @@ TEST_F(CodegenTest, CFG4) { } TEST_F(CodegenTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -168,7 +169,7 @@ TEST_F(CodegenTest, CFG5) { } TEST_F(CodegenTest, IntConstant) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -176,7 +177,7 @@ TEST_F(CodegenTest, IntConstant) { } TEST_F(CodegenTest, Return1) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); @@ -184,7 +185,7 @@ TEST_F(CodegenTest, Return1) { } TEST_F(CodegenTest, Return2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); @@ -193,7 +194,7 @@ TEST_F(CodegenTest, Return2) { } TEST_F(CodegenTest, Return3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); @@ -202,7 +203,7 @@ TEST_F(CodegenTest, Return3) { } TEST_F(CodegenTest, ReturnIf1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ, 3, @@ -213,7 +214,7 @@ TEST_F(CodegenTest, ReturnIf1) { } TEST_F(CodegenTest, ReturnIf2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ | 0 << 4 | 1 << 8, 3, @@ -224,17 +225,17 @@ TEST_F(CodegenTest, ReturnIf2) { } // Exercise bit-wise (one's complement) not-int instruction. -#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_F(CodegenTest, TEST_NAME) { \ - const int32_t input = INPUT; \ - const uint16_t input_lo = Low16Bits(input); \ - const uint16_t input_hi = High16Bits(input); \ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \ - Instruction::CONST | 0 << 8, input_lo, input_hi, \ - Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ - Instruction::RETURN | 1 << 8); \ - \ - TestCode(data, true, EXPECTED_OUTPUT); \ +#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ +TEST_F(CodegenTest, TEST_NAME) { \ + const int32_t input = INPUT; \ + const uint16_t input_lo = Low16Bits(input); \ + const uint16_t input_hi = High16Bits(input); \ + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( \ + Instruction::CONST | 0 << 8, input_lo, input_hi, \ + Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ + Instruction::RETURN | 1 << 8); \ + \ + TestCode(data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -256,7 +257,7 @@ TEST_F(CodegenTest, TEST_NAME) { \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ const uint16_t word2 = Low16Bits(High32Bits(input)); \ const uint16_t word3 = High16Bits(High32Bits(input)); /* MSW. */ \ - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( \ + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( \ Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ @@ -306,7 +307,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FIVE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FIVE_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::CONST_WIDE | 2 << 8, 1, 0, 0, 0, Instruction::ADD_LONG | 0, 0 << 8 | 2, // v0 <- 2^32 + 1 @@ -318,7 +319,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { } TEST_F(CodegenTest, ReturnAdd1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, @@ -328,7 +329,7 @@ TEST_F(CodegenTest, ReturnAdd1) { } TEST_F(CodegenTest, ReturnAdd2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -338,7 +339,7 @@ TEST_F(CodegenTest, ReturnAdd2) { } TEST_F(CodegenTest, ReturnAdd3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -347,7 +348,7 @@ TEST_F(CodegenTest, ReturnAdd3) { } TEST_F(CodegenTest, ReturnAdd4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); @@ -356,7 +357,7 @@ TEST_F(CodegenTest, ReturnAdd4) { } TEST_F(CodegenTest, ReturnMulInt) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, @@ -366,7 +367,7 @@ TEST_F(CodegenTest, ReturnMulInt) { } TEST_F(CodegenTest, ReturnMulInt2addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, @@ -376,7 +377,7 @@ TEST_F(CodegenTest, ReturnMulInt2addr) { } TEST_F(CodegenTest, ReturnMulLong) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG, 2 << 8 | 0, @@ -386,7 +387,7 @@ TEST_F(CodegenTest, ReturnMulLong) { } TEST_F(CodegenTest, ReturnMulLong2addr) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG_2ADDR | 2 << 12, @@ -396,7 +397,7 @@ TEST_F(CodegenTest, ReturnMulLong2addr) { } TEST_F(CodegenTest, ReturnMulIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -405,7 +406,7 @@ TEST_F(CodegenTest, ReturnMulIntLit8) { } TEST_F(CodegenTest, ReturnMulIntLit16) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); @@ -578,7 +579,7 @@ TEST_F(CodegenTest, MaterializedCondition2) { } TEST_F(CodegenTest, ReturnDivIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -587,7 +588,7 @@ TEST_F(CodegenTest, ReturnDivIntLit8) { } TEST_F(CodegenTest, ReturnDivInt2Addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index e1980e080e..d27104752b 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -36,7 +36,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { public: ConstantFoldingTest() : graph_(nullptr) { } - void TestCode(const uint16_t* data, + void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after_cf, const std::string& expected_after_dce, @@ -100,7 +100,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { * return v1 2. return v1 */ TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::NEG_INT | 1 << 8 | 0 << 12, Instruction::RETURN | 1 << 8); @@ -161,7 +161,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::NEG_LONG | 2 << 8 | 0 << 12, Instruction::RETURN_WIDE | 2 << 8); @@ -219,7 +219,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -284,7 +284,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { * return v2 8. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12, @@ -369,7 +369,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 3 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::SUB_INT | 2 << 8, 0 | 1 << 8, @@ -432,7 +432,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 1, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8, @@ -496,7 +496,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 3, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8, @@ -569,7 +569,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { * return v2 13. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -672,7 +672,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { * return-void 7. return */ TEST_F(ConstantFoldingTest, ConstantCondition) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 3cc7b0e78d..cca1055ac8 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -148,6 +148,77 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi // Simplify the pattern: // +// B1 +// / \ +// | foo() // always throws +// \ goto B2 +// \ / +// B2 +// +// Into: +// +// B1 +// / \ +// | foo() +// | goto Exit +// | | +// B2 Exit +// +// Rationale: +// Removal of the never taken edge to B2 may expose +// other optimization opportunities, such as code sinking. +bool HDeadCodeElimination::SimplifyAlwaysThrows() { + // Make sure exceptions go to exit. + if (graph_->HasTryCatch()) { + return false; + } + HBasicBlock* exit = graph_->GetExitBlock(); + if (exit == nullptr) { + return false; + } + + bool rerun_dominance_and_loop_analysis = false; + + // Order does not matter, just pick one. + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + HInstruction* first = block->GetFirstInstruction(); + HInstruction* last = block->GetLastInstruction(); + // Ensure only one throwing instruction appears before goto. + if (first->AlwaysThrows() && + first->GetNext() == last && + last->IsGoto() && + block->GetPhis().IsEmpty() && + block->GetPredecessors().size() == 1u) { + DCHECK_EQ(block->GetSuccessors().size(), 1u); + HBasicBlock* pred = block->GetSinglePredecessor(); + HBasicBlock* succ = block->GetSingleSuccessor(); + // Ensure no computations are merged through throwing block. + // This does not prevent the optimization per se, but would + // require an elaborate clean up of the SSA graph. + if (succ != exit && + !block->Dominates(pred) && + pred->Dominates(succ) && + succ->GetPredecessors().size() > 1u && + succ->GetPhis().IsEmpty()) { + block->ReplaceSuccessor(succ, exit); + rerun_dominance_and_loop_analysis = true; + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + } + } + } + + // We need to re-analyze the graph in order to run DCE afterwards. + if (rerun_dominance_and_loop_analysis) { + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + return true; + } + return false; +} + +// Simplify the pattern: +// // B1 B2 ... // goto goto goto // \ | / @@ -381,6 +452,7 @@ void HDeadCodeElimination::Run() { // Simplify graph to generate more dead block patterns. ConnectSuccessiveBlocks(); bool did_any_simplification = false; + did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); did_any_simplification |= RemoveDeadBlocks(); if (did_any_simplification) { diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 84fd890eee..92a7f562e1 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -40,6 +40,7 @@ class HDeadCodeElimination : public HOptimization { void MaybeRecordSimplifyIf(); bool RemoveDeadBlocks(); void RemoveDeadInstructions(); + bool SimplifyAlwaysThrows(); bool SimplifyIfs(); void ConnectSuccessiveBlocks(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 929572ee3b..adb6ce1187 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -29,12 +29,12 @@ namespace art { class DeadCodeEliminationTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, + void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after); }; -void DeadCodeEliminationTest::TestCode(const uint16_t* data, +void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after) { HGraph* graph = CreateCFG(data); @@ -73,7 +73,7 @@ void DeadCodeEliminationTest::TestCode(const uint16_t* data, * return-void 7. return */ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, @@ -135,7 +135,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { * return 13. return-void */ TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 572466eec8..1d72ba116e 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -26,10 +26,12 @@ namespace art { class OptimizerTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length); + void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length); }; -void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) { +void OptimizerTest::TestCode(const std::vector<uint16_t>& data, + const uint32_t* blocks, + size_t blocks_length) { HGraph* graph = CreateCFG(data); ASSERT_EQ(graph->GetBlocks().size(), blocks_length); for (size_t i = 0, e = blocks_length; i < e; ++i) { @@ -49,7 +51,7 @@ void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_ } TEST_F(OptimizerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); // Block number 1 const uint32_t dominators[] = { @@ -62,7 +64,7 @@ TEST_F(OptimizerTest, ReturnVoid) { } TEST_F(OptimizerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::RETURN_VOID); // Block number 2 @@ -77,7 +79,7 @@ TEST_F(OptimizerTest, CFG1) { } TEST_F(OptimizerTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 Instruction::RETURN_VOID); // Block number 3 @@ -94,7 +96,7 @@ TEST_F(OptimizerTest, CFG2) { } TEST_F(OptimizerTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, // Block number 1 Instruction::RETURN_VOID, // Block number 2 Instruction::GOTO | 0xFF00); // Block number 3 @@ -109,14 +111,14 @@ TEST_F(OptimizerTest, CFG3) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -125,7 +127,7 @@ TEST_F(OptimizerTest, CFG3) { } TEST_F(OptimizerTest, CFG4) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -138,14 +140,14 @@ TEST_F(OptimizerTest, CFG4) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); } TEST_F(OptimizerTest, CFG5) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, // Block number 1 Instruction::GOTO | 0x100, // Dead block Instruction::GOTO | 0xFE00); // Block number 2 @@ -162,7 +164,7 @@ TEST_F(OptimizerTest, CFG5) { } TEST_F(OptimizerTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -181,7 +183,7 @@ TEST_F(OptimizerTest, CFG6) { } TEST_F(OptimizerTest, CFG7) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 @@ -201,7 +203,7 @@ TEST_F(OptimizerTest, CFG7) { } TEST_F(OptimizerTest, CFG8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -222,7 +224,7 @@ TEST_F(OptimizerTest, CFG8) { } TEST_F(OptimizerTest, CFG9) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -243,7 +245,7 @@ TEST_F(OptimizerTest, CFG9) { } TEST_F(OptimizerTest, CFG10) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, // Block number 1 Instruction::IF_EQ, 3, // Block number 2 diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index b799fb4688..75b8e9609e 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -31,7 +31,7 @@ class FindLoopsTest : public OptimizingUnitTest {}; TEST_F(FindLoopsTest, CFG1) { // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -42,7 +42,7 @@ TEST_F(FindLoopsTest, CFG1) { } TEST_F(FindLoopsTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -53,7 +53,7 @@ TEST_F(FindLoopsTest, CFG2) { } TEST_F(FindLoopsTest, CFG3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -67,7 +67,7 @@ TEST_F(FindLoopsTest, CFG3) { } TEST_F(FindLoopsTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -82,7 +82,7 @@ TEST_F(FindLoopsTest, CFG4) { } TEST_F(FindLoopsTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -126,7 +126,7 @@ TEST_F(FindLoopsTest, Loop1) { // while (a == a) { // } // return; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, @@ -150,7 +150,7 @@ TEST_F(FindLoopsTest, Loop2) { // while (a == a) { // } // return a; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x400, Instruction::IF_EQ, 4, @@ -173,7 +173,7 @@ TEST_F(FindLoopsTest, Loop2) { TEST_F(FindLoopsTest, Loop3) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -197,7 +197,7 @@ TEST_F(FindLoopsTest, Loop3) { TEST_F(FindLoopsTest, Loop4) { // Test loop with originally two back edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -221,7 +221,7 @@ TEST_F(FindLoopsTest, Loop4) { TEST_F(FindLoopsTest, Loop5) { // Test loop with two exit edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -244,7 +244,7 @@ TEST_F(FindLoopsTest, Loop5) { } TEST_F(FindLoopsTest, InnerLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -273,7 +273,7 @@ TEST_F(FindLoopsTest, InnerLoop) { } TEST_F(FindLoopsTest, TwoLoops) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, // first loop @@ -301,7 +301,7 @@ TEST_F(FindLoopsTest, TwoLoops) { } TEST_F(FindLoopsTest, NonNaturalLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x0100, @@ -317,7 +317,7 @@ TEST_F(FindLoopsTest, NonNaturalLoop) { } TEST_F(FindLoopsTest, DoWhileLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0xFFFF, diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index b1ac027a68..c88baa8610 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -31,7 +31,15 @@ namespace art { using android::base::StringPrintf; static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) { - return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid(); + // Anything that returns is allowed to jump into the exit block. + if (instruction->IsReturn() || instruction->IsReturnVoid()) { + return true; + } + // Anything that always throws is allowed to jump into the exit block. + if (instruction->IsGoto() && instruction->GetPrevious() != nullptr) { + instruction = instruction->GetPrevious(); + } + return instruction->AlwaysThrows(); } static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 9ca3e4953a..08bfa5d80f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -22,7 +22,7 @@ namespace art { class GraphCheckerTest : public OptimizingUnitTest { protected: HGraph* CreateSimpleCFG(); - void TestCode(const uint16_t* data); + void TestCode(const std::vector<uint16_t>& data); }; /** @@ -48,7 +48,7 @@ HGraph* GraphCheckerTest::CreateSimpleCFG() { return graph; } -void GraphCheckerTest::TestCode(const uint16_t* data) { +void GraphCheckerTest::TestCode(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); ASSERT_NE(graph, nullptr); @@ -58,14 +58,14 @@ void GraphCheckerTest::TestCode(const uint16_t* data) { } TEST_F(GraphCheckerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); TestCode(data); } TEST_F(GraphCheckerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -73,7 +73,7 @@ TEST_F(GraphCheckerTest, CFG1) { } TEST_F(GraphCheckerTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -83,7 +83,7 @@ TEST_F(GraphCheckerTest, CFG2) { } TEST_F(GraphCheckerTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -128,7 +128,7 @@ TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) { TEST_F(GraphCheckerTest, SSAPhi) { // This code creates one Phi function during the conversion to SSA form. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 7a66d807cf..452be6feae 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -392,6 +392,34 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } +static bool AlwaysThrows(ArtMethod* method) { + CodeItemDataAccessor accessor(method); + // Skip native methods, methods with try blocks, and methods that are too large. + if (!accessor.HasCodeItem() || + accessor.TriesSize() != 0 || + accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) { + return false; + } + // Scan for exits. + bool throw_seen = false; + for (const DexInstructionPcPair& pair : accessor) { + switch (pair.Inst().Opcode()) { + case Instruction::RETURN: + case Instruction::RETURN_VOID: + case Instruction::RETURN_WIDE: + case Instruction::RETURN_OBJECT: + case Instruction::RETURN_VOID_NO_BARRIER: + return false; // found regular control flow back + case Instruction::THROW: + throw_seen = true; + break; + default: + break; + } + } + return throw_seen; +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || invoke_instruction->IsInvokePolymorphic()) { @@ -431,20 +459,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } if (actual_method != nullptr) { + // Single target. bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), /* do_rtp */ true, cha_devirtualize); - if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { - if (cha_devirtualize) { - // Add dependency due to devirtulization. We've assumed resolved_method - // has single implementation. - outermost_graph_->AddCHASingleImplementationDependency(resolved_method); - MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); - } else { - MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); + if (result) { + // Successfully inlined. + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + if (cha_devirtualize) { + // Add dependency due to devirtualization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); + } else { + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); + } } + } else if (!cha_devirtualize && AlwaysThrows(actual_method)) { + // Set always throws property for non-inlined method call with single target + // (unless it was obtained through CHA, because that would imply we have + // to add the CHA dependency, which seems not worth it). + invoke_instruction->SetAlwaysThrows(true); } return result; } @@ -1660,7 +1697,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); - CodeItemDebugInfoAccessor code_item_accessor(&callee_dex_file, code_item); + CodeItemDebugInfoAccessor code_item_accessor(resolved_method); ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), caller_compilation_unit_.GetDexCache(), @@ -1968,7 +2005,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, return; } - CodeItemDataAccessor accessor(&callee_graph->GetDexFile(), code_item); + CodeItemDataAccessor accessor(callee_graph->GetDexFile(), code_item); HInliner inliner(callee_graph, outermost_graph_, codegen_, diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 72a93c1f77..64a1eccf60 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -49,7 +49,7 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, const DexCompilationUnit* outer_compilation_unit, CompilerDriver* compiler_driver, CodeGenerator* code_generator, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, VariableSizedHandleScope* handles, ScopedArenaAllocator* local_allocator) diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 708a09711a..4428c53277 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ +#include "base/array_ref.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "data_type.h" @@ -57,7 +58,7 @@ class HInstructionBuilder : public ValueObject { const DexCompilationUnit* outer_compilation_unit, CompilerDriver* compiler_driver, CodeGenerator* code_generator, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, VariableSizedHandleScope* handles, ScopedArenaAllocator* local_allocator); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca1b451e6b..2f8e33f941 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 99b8b5df74..830d0403e4 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 113c9de5a2..cafa5228d9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 521bad27e2..89f1818be2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index baa410b884..46b7f3f1ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6dd8b8e1f5..6483b7cb2a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 43b63a73ef..9fa5b74c62 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -35,11 +35,12 @@ namespace art { class LinearizeTest : public OptimizingUnitTest { protected: template <size_t number_of_blocks> - void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]); + void TestCode(const std::vector<uint16_t>& data, + const uint32_t (&expected_order)[number_of_blocks]); }; template <size_t number_of_blocks> -void LinearizeTest::TestCode(const uint16_t* data, +void LinearizeTest::TestCode(const std::vector<uint16_t>& data, const uint32_t (&expected_order)[number_of_blocks]) { HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( @@ -68,7 +69,7 @@ TEST_F(LinearizeTest, CFG1) { // + / \ + // Block4 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::IF_EQ, 0xFFFE, @@ -93,7 +94,7 @@ TEST_F(LinearizeTest, CFG2) { // + / \ + // Block5 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -119,7 +120,7 @@ TEST_F(LinearizeTest, CFG3) { // Block6 + Block9 // | + // Block4 ++ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::RETURN_VOID, @@ -149,7 +150,7 @@ TEST_F(LinearizeTest, CFG4) { // + / \ + // Block5 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 7, Instruction::IF_EQ, 0xFFFE, @@ -179,7 +180,7 @@ TEST_F(LinearizeTest, CFG5) { // +/ \ + // Block6 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -205,7 +206,7 @@ TEST_F(LinearizeTest, CFG6) { // Block5 <- Block9 Block6 + // | // Block7 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0004, @@ -233,7 +234,7 @@ TEST_F(LinearizeTest, CFG7) { // | // Block7 // - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0005, diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index e45d7c820c..66660662e4 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -31,10 +31,10 @@ namespace art { class LiveRangesTest : public OptimizingUnitTest { public: - HGraph* BuildGraph(const uint16_t* data); + HGraph* BuildGraph(const std::vector<uint16_t>& data); }; -HGraph* LiveRangesTest::BuildGraph(const uint16_t* data) { +HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. @@ -57,7 +57,7 @@ TEST_F(LiveRangesTest, CFG1) { * | * 12: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -102,7 +102,7 @@ TEST_F(LiveRangesTest, CFG2) { * | * 26: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -151,7 +151,7 @@ TEST_F(LiveRangesTest, CFG3) { * | * 28: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -225,7 +225,7 @@ TEST_F(LiveRangesTest, Loop1) { * 30: exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -304,7 +304,7 @@ TEST_F(LiveRangesTest, Loop2) { * We want to make sure the phi at 10 has a lifetime hole after the add at 20. */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, @@ -378,7 +378,7 @@ TEST_F(LiveRangesTest, CFG4) { * * We want to make sure the constant0 has a lifetime hole after the 16: add. */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::IF_EQ, 5, diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 35bc4ff8b3..6621a03568 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -31,7 +31,7 @@ namespace art { class LivenessTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; static void DumpBitVector(BitVector* vector, @@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector, buffer << ")\n"; } -void LivenessTest::TestCode(const uint16_t* data, const char* expected) { +void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); @@ -86,7 +86,7 @@ TEST_F(LivenessTest, CFG1) { " kill: (0)\n"; // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -108,7 +108,7 @@ TEST_F(LivenessTest, CFG2) { " live out: (0)\n" " kill: (0)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -134,7 +134,7 @@ TEST_F(LivenessTest, CFG3) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -181,7 +181,7 @@ TEST_F(LivenessTest, CFG4) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -228,7 +228,7 @@ TEST_F(LivenessTest, CFG5) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -273,7 +273,7 @@ TEST_F(LivenessTest, Loop1) { " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -318,7 +318,7 @@ TEST_F(LivenessTest, Loop3) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -370,7 +370,7 @@ TEST_F(LivenessTest, Loop4) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -425,7 +425,7 @@ TEST_F(LivenessTest, Loop5) { " live out: (0001)\n" " kill: (0001)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -475,7 +475,7 @@ TEST_F(LivenessTest, Loop6) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -530,7 +530,7 @@ TEST_F(LivenessTest, Loop7) { " live out: (00000)\n" " kill: (00000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -580,7 +580,7 @@ TEST_F(LivenessTest, Loop8) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 88326d321b..aae94b227c 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -25,6 +25,51 @@ #include <iostream> +/** + * The general algorithm of load-store elimination (LSE). + * Load-store analysis in the previous pass collects a list of heap locations + * and does alias analysis of those heap locations. + * LSE keeps track of a list of heap values corresponding to the heap + * locations. It visits basic blocks in reverse post order and for + * each basic block, visits instructions sequentially, and processes + * instructions as follows: + * - If the instruction is a load, and the heap location for that load has a + * valid heap value, the load can be eliminated. In order to maintain the + * validity of all heap locations during the optimization phase, the real + * elimination is delayed till the end of LSE. + * - If the instruction is a store, it updates the heap value for the heap + * location of the store with the store instruction. The real heap value + * can be fetched from the store instruction. Heap values are invalidated + * for heap locations that may alias with the store instruction's heap + * location. The store instruction can be eliminated unless the value stored + * is later needed e.g. by a load from the same/aliased heap location or + * the heap location persists at method return/deoptimization. + * The store instruction is also needed if it's not used to track the heap + * value anymore, e.g. when it fails to merge with the heap values from other + * predecessors. + * - A store that stores the same value as the heap value is eliminated. + * - The list of heap values are merged at basic block entry from the basic + * block's predecessors. The algorithm is single-pass, so loop side-effects is + * used as best effort to decide if a heap location is stored inside the loop. + * - A special type of objects called singletons are instantiated in the method + * and have a single name, i.e. no aliases. Singletons have exclusive heap + * locations since they have no aliases. Singletons are helpful in narrowing + * down the life span of a heap location such that they do not always + * need to participate in merging heap values. Allocation of a singleton + * can be eliminated if that singleton is not used and does not persist + * at method return/deoptimization. + * - For newly instantiated instances, their heap values are initialized to + * language defined default values. + * - Some instructions such as invokes are treated as loading and invalidating + * all the heap values, depending on the instruction's side effects. + * - Finalizable objects are considered as persisting at method + * return/deoptimization. + * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad + * and VecStore instructions. + * - Currently this LSE algorithm doesn't handle graph with try-catch, due to + * the special block merging structure. + */ + namespace art { // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. @@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor { removed_loads_(allocator_.Adapter(kArenaAllocLSE)), substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)), possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) { + singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor { return type_conversion; } - // Find an instruction's substitute if it should be removed. + // Find an instruction's substitute if it's a removed load. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { + if (!IsLoad(instruction)) { + return instruction; + } size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { if (removed_loads_[i] == instruction) { - return substitute_instructions_for_loads_[i]; + HInstruction* substitute = substitute_instructions_for_loads_[i]; + // The substitute list is a flat hierarchy. + DCHECK_EQ(FindSubstitute(substitute), substitute); + return substitute; } } return instruction; } void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK(IsLoad(load)); DCHECK_EQ(FindSubstitute(heap_value), heap_value) << "Unexpected heap_value that has a substitute " << heap_value->DebugName(); removed_loads_.push_back(load); @@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor { new_instance->GetBlock()->RemoveInstruction(new_instance); } } - for (HInstruction* new_array : singleton_new_arrays_) { - size_t removed = HConstructorFence::RemoveConstructorFences(new_array); - MaybeRecordStat(stats_, - MethodCompilationStat::kConstructorFenceRemovedLSE, - removed); + } - if (!new_array->HasNonEnvironmentUses()) { - new_array->RemoveEnvironmentUsers(); - new_array->GetBlock()->RemoveInstruction(new_array); - } + private: + static bool IsLoad(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; } + // Unresolved load is not treated as a load. + return instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet(); } - private: - // If heap_values[index] is an instance field store, need to keep the store. - // This is necessary if a heap value is killed due to merging, or loop side - // effects (which is essentially merging also), since a load later from the - // location won't be eliminated. + static bool IsStore(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; + } + // Unresolved store is not treated as a store. + return instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->IsStaticFieldSet(); + } + + // Returns the real heap value by finding its substitute or by "peeling" + // a store instruction. + HInstruction* GetRealHeapValue(HInstruction* heap_value) { + if (IsLoad(heap_value)) { + return FindSubstitute(heap_value); + } + if (!IsStore(heap_value)) { + return heap_value; + } + + // We keep track of store instructions as the heap values which might be + // eliminated if the stores are later found not necessary. The real stored + // value needs to be fetched from the store instruction. + if (heap_value->IsInstanceFieldSet()) { + heap_value = heap_value->AsInstanceFieldSet()->GetValue(); + } else if (heap_value->IsStaticFieldSet()) { + heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else { + DCHECK(heap_value->IsArraySet()); + heap_value = heap_value->AsArraySet()->GetValue(); + } + // heap_value may already be a removed load. + return FindSubstitute(heap_value); + } + + // If heap_value is a store, need to keep the store. + // This is necessary if a heap value is killed or replaced by another value, + // so that the store is no longer used to track heap value. void KeepIfIsStore(HInstruction* heap_value) { - if (heap_value == kDefaultHeapValue || - heap_value == kUnknownHeapValue || - !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { + if (!IsStore(heap_value)) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor { } } + // If a heap location X may alias with heap location at `loc_index` + // and heap_values of that heap location X holds a store, keep that store. + // It's needed for a dependent load that's not eliminated since any store + // that may put value into the load's heap location needs to be kept. + void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values, + size_t loc_index) { + for (size_t i = 0; i < heap_values.size(); i++) { + if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) { + KeepIfIsStore(heap_values[i]); + } + } + } + void HandleLoopSideEffects(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ScopedArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; - // Don't eliminate loads in irreducible loops. This is safe for singletons, because - // they are always used by the non-eliminated loop-phi. + // Don't eliminate loads in irreducible loops. + // Also keep the stores before the loop. if (block->GetLoopInformation()->IsIrreducible()) { if (kIsDebugBuild) { for (size_t i = 0; i < heap_values.size(); i++) { DCHECK_EQ(heap_values[i], kUnknownHeapValue); } } + for (size_t i = 0; i < heap_values.size(); i++) { + KeepIfIsStore(pre_header_heap_values[i]); + } return; } - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ScopedArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; @@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor { for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* ref_info = location->GetReferenceInfo(); - if (ref_info->IsSingletonAndRemovable() && - !location->IsValueKilledByLoopSideEffects()) { - // A removable singleton's field that's not stored into inside a loop is + if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) { + // A singleton's field that's not stored into inside a loop is // invariant throughout the loop. Nothing to do. } else { - // heap value is killed by loop side effects (stored into directly, or - // due to aliasing). Or the heap value may be needed after method return - // or deoptimization. + // heap value is killed by loop side effects. KeepIfIsStore(pre_header_heap_values[i]); heap_values[i] = kUnknownHeapValue; } } + } else { + // The loop doesn't kill any value. } } @@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* merged_value = nullptr; + // If we can merge the store itself from the predecessors, we keep + // the store as the heap value as long as possible. In case we cannot + // merge the store, we try to merge the values of the stores. + HInstruction* merged_store_value = nullptr; // Whether merged_value is a result that's merged from all predecessors. bool from_all_predecessors = true; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* ref = ref_info->GetReference(); HInstruction* singleton_ref = nullptr; if (ref_info->IsSingleton()) { - // We do more analysis of liveness when merging heap values for such - // cases since stores into such references may potentially be eliminated. - singleton_ref = ref_info->GetReference(); + // We do more analysis based on singleton's liveness when merging + // heap values for such cases. + singleton_ref = ref; } for (HBasicBlock* predecessor : predecessors) { HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if (!IsStore(pred_value)) { + pred_value = FindSubstitute(pred_value); + } + DCHECK(pred_value != nullptr); + HInstruction* pred_store_value = GetRealHeapValue(pred_value); if ((singleton_ref != nullptr) && !singleton_ref->GetBlock()->Dominates(predecessor)) { - // singleton_ref is not live in this predecessor. Skip this predecessor since - // it does not really have the location. + // singleton_ref is not live in this predecessor. No need to merge + // since singleton_ref is not live at the beginning of this block. DCHECK_EQ(pred_value, kUnknownHeapValue); from_all_predecessors = false; - continue; + break; } if (merged_value == nullptr) { // First seen heap value. + DCHECK(pred_value != nullptr); merged_value = pred_value; } else if (pred_value != merged_value) { // There are conflicting values. merged_value = kUnknownHeapValue; + // We may still be able to merge store values. + } + + // Conflicting stores may be storing the same value. We do another merge + // of real stored values. + if (merged_store_value == nullptr) { + // First seen store value. + DCHECK(pred_store_value != nullptr); + merged_store_value = pred_store_value; + } else if (pred_store_value != merged_store_value) { + // There are conflicting store values. + merged_store_value = kUnknownHeapValue; + // There must be conflicting stores also. + DCHECK_EQ(merged_value, kUnknownHeapValue); + // No need to merge anymore. break; } } - if (ref_info->IsSingleton()) { - if (ref_info->IsSingletonAndNonRemovable() || - (merged_value == kUnknownHeapValue && - !block->IsSingleReturnOrReturnVoidAllowingPhis())) { - // The heap value may be needed after method return or deoptimization, - // or there are conflicting heap values from different predecessors and - // this block is not a single return, - // keep the last store in each predecessor since future loads may not - // be eliminated. + if (merged_value == nullptr) { + DCHECK(!from_all_predecessors); + DCHECK(singleton_ref != nullptr); + } + if (from_all_predecessors) { + if (ref_info->IsSingletonAndRemovable() && + block->IsSingleReturnOrReturnVoidAllowingPhis()) { + // Values in the singleton are not needed anymore. + } else if (!IsStore(merged_value)) { + // We don't track merged value as a store anymore. We have to + // hold the stores in predecessors live here. for (HBasicBlock* predecessor : predecessors) { ScopedArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; @@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor { } } } else { - // Currenctly we don't eliminate stores to non-singletons. + DCHECK(singleton_ref != nullptr); + // singleton_ref is non-existing at the beginning of the block. There is + // no need to keep the stores. } - if ((merged_value == nullptr) || !from_all_predecessors) { + if (!from_all_predecessors) { DCHECK(singleton_ref != nullptr); DCHECK((singleton_ref->GetBlock() == block) || - !singleton_ref->GetBlock()->Dominates(block)); + !singleton_ref->GetBlock()->Dominates(block)) + << "method: " << GetGraph()->GetMethodName(); // singleton_ref is not defined before block or defined only in some of its // predecessors, so block doesn't really have the location at its entry. heap_values[i] = kUnknownHeapValue; - } else { + } else if (predecessors.size() == 1) { + // Inherit heap value from the single predecessor. + DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value); heap_values[i] = merged_value; + } else { + DCHECK(merged_value == kUnknownHeapValue || + merged_value == kDefaultHeapValue || + merged_value->GetBlock()->Dominates(block)); + if (merged_value != kUnknownHeapValue) { + heap_values[i] = merged_value; + } else { + // Stores in different predecessors may be storing the same value. + heap_values[i] = merged_store_value; + } } } } @@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue) { - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same - // field/element that a "removed" store puts the value. That store - // must be to a singleton's field/element. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); - // heap_value may already have a substitute. - heap_value = FindSubstitute(heap_value); - } - } + heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); } else { if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { // The only situation where the same heap location has different type is when @@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); } + // Load isn't eliminated. Put the load as the value into the HeapLocation. + // This acts like GVN but with better aliasing analysis. + heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); return; } AddRemovedLoad(instruction, heap_value); @@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor { } bool Equal(HInstruction* heap_value, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + if (heap_value == kUnknownHeapValue) { + // Don't compare kUnknownHeapValue with other values. + return false; + } if (heap_value == value) { return true; } if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) { return true; } + HInstruction* real_heap_value = GetRealHeapValue(heap_value); + if (real_heap_value != heap_value) { + return Equal(real_heap_value, value); + } return false; } @@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor { size_t vector_length, int16_t declaring_class_def_index, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); @@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool same_value = false; bool possibly_redundant = false; + if (Equal(heap_value, value)) { // Store into the heap location with the same value. - same_value = true; - } else if (index != nullptr && - heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) { - // For array element, don't eliminate stores if the location can be aliased - // (due to either ref or index aliasing). - } else if (ref_info->IsSingleton()) { - // Store into a field/element of a singleton. The value cannot be killed due to - // aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. - // Stores whose values may be needed after method return or deoptimization - // are also removed from possibly_removed_stores_ when that is detected. - possibly_redundant = true; + // This store can be eliminated right away. + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else { HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - // instruction is a store in the loop so the loop must does write. + if (loop_info == nullptr) { + // Store is not in a loop. We try to precisely track the heap value by + // the store. + possibly_redundant = true; + } else if (!loop_info->IsIrreducible()) { + // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - - if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { - DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); - // Keep the store since its value may be needed at the loop header. - possibly_redundant = false; - } else { - // The singleton is created inside the loop. Value stored to it isn't needed at + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. + possibly_redundant = true; + } else { + // Keep the store since its value may be needed at the loop header. } + } else { + // Keep the store inside irreducible loops. } } - if (same_value || possibly_redundant) { + if (possibly_redundant) { possibly_removed_stores_.push_back(instruction); } - if (!same_value) { - if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); - // Put the store as the heap value. If the value is loaded from heap - // by a load later, this store isn't really redundant. - heap_values[idx] = instruction; - } else { - heap_values[idx] = value; - } - } + // Put the store as the heap value. If the value is loaded or needed after + // return/deoptimization later, this store isn't really redundant. + heap_values[idx] = instruction; + // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { if (i == idx) { continue; } - if (heap_values[i] == value) { + if (Equal(heap_values[i], value)) { // Same value should be kept even if aliasing happens. continue; } @@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor { continue; } if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias. + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { - // Filter out fake instructions before checking instruction kind below. - if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) { - continue; - } // A store is kept as the heap value for possibly removed stores. - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - // Check whether the reference for a store is used by an environment local of - // HDeoptimize. + // That value stored is generally observeable after deoptimization, except + // for singletons that don't escape after deoptimization. + if (IsStore(heap_value)) { + if (heap_value->IsStaticFieldSet()) { + KeepIfIsStore(heap_value); + continue; + } HInstruction* reference = heap_value->InputAt(0); - DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()); - for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { - HEnvironment* user = use.GetUser(); - if (user->GetHolder() == instruction) { - // The singleton for the store is visible at this deoptimization - // point. Need to keep the store so that the heap value is - // seen by the interpreter. + if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) { + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable objects alway escape. KeepIfIsStore(heap_value); + continue; } + // Check whether the reference for a store is used by an environment local of + // HDeoptimize. If not, the singleton is not observed after + // deoptimizion. + for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder() == instruction) { + // The singleton for the store is visible at this deoptimization + // point. Need to keep the store so that the heap value is + // seen by the interpreter. + KeepIfIsStore(heap_value); + } + } + } else { + KeepIfIsStore(heap_value); } } } @@ -758,7 +901,7 @@ class LSEVisitor : public HGraphDelegateVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_arrays_.push_back(new_array); + singleton_new_instances_.push_back(new_array); } ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; @@ -791,7 +934,6 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*> possibly_removed_stores_; ScopedArenaVector<HInstruction*> singleton_new_instances_; - ScopedArenaVector<HInstruction*> singleton_new_arrays_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 180893dc86..66fca36f91 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2040,6 +2040,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance), // could throw OOME, but it is still OK to remove them if they are unused. virtual bool CanThrow() const { return false; } + + // Does the instruction always throw an exception unconditionally? + virtual bool AlwaysThrows() const { return false; } + bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -4191,6 +4195,10 @@ class HInvoke : public HVariableInputSizeInstruction { bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); } + void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } + + bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -4221,7 +4229,8 @@ class HInvoke : public HVariableInputSizeInstruction { static constexpr size_t kFieldReturnTypeSize = MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; - static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1; + static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1; + static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; @@ -6597,6 +6606,8 @@ class HThrow FINAL : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } + bool AlwaysThrows() const OVERRIDE { return true; } + DECLARE_INSTRUCTION(Throw); protected: diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 92b427cafa..57db7a634c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -242,7 +242,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = new (allocator) HDeadCodeElimination(graph, stats, name); break; case OptimizationPass::kInliner: { - CodeItemDataAccessor accessor(dex_compilation_unit.GetDexFile(), + CodeItemDataAccessor accessor(*dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetCodeItem()); opt = new (allocator) HInliner(graph, // outer_graph graph, // outermost_graph diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index e2b2106f65..d20b681b49 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -41,7 +41,7 @@ namespace art { // Run the tests only on host. #ifndef ART_TARGET_ANDROID -class OptimizingCFITest : public CFITest { +class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; @@ -63,7 +63,7 @@ class OptimizingCFITest : public CFITest { // Setup simple context. std::string error; isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); - graph_ = CreateGraph(&pool_and_allocator_); + graph_ = CreateGraph(); // Generate simple frame with some spills. code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); code_gen_->GetAssembler()->cfi().SetEnabled(true); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b64f82caee..c35c490118 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -382,6 +382,8 @@ class OptimizingCompiler FINAL : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; + void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info); + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -766,13 +768,13 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, static constexpr size_t kSpaceFilterOptimizingThreshold = 128; const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) - && (CodeItemInstructionAccessor(&dex_file, code_item).InsnsSizeInCodeUnits() > + && (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() > kSpaceFilterOptimizingThreshold)) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSpaceFilter); return nullptr; } - CodeItemDebugInfoAccessor code_item_accessor(&dex_file, code_item); + CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx); HGraph* graph = new (allocator) HGraph( allocator, arena_stack, @@ -783,7 +785,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, compiler_driver->GetCompilerOptions().GetDebuggable(), osr); - const uint8_t* interpreter_metadata = nullptr; + ArrayRef<const uint8_t> interpreter_metadata; // For AOT compilation, we may not get a method, for example if its class is erroneous. // JIT should always have a method. DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); @@ -940,7 +942,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( compiler_driver, codegen.get(), compilation_stats_.get(), - /* interpreter_metadata */ nullptr, + /* interpreter_metadata */ ArrayRef<const uint8_t>(), handles); builder.BuildIntrinsicGraph(method); } @@ -1230,7 +1232,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1246,14 +1248,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1361,7 +1356,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1377,14 +1372,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1408,4 +1396,22 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } +void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + DCHECK(compiler_options.GenerateAnyDebugInfo()); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + GetCompilerDriver()->GetInstructionSet(), + GetCompilerDriver()->GetInstructionSetFeatures(), + mini_debug_info, + ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + MutexLock mu(Thread::Current(), g_jit_debug_mutex); + JITCodeEntry* entry = CreateJITCodeEntry(elf_file); + IncrementJITCodeEntryRefcount(entry, info.code_address); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 32a94ab5e4..0023265e50 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -75,6 +75,7 @@ enum class MethodCompilationStat { kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kSimplifyThrowingInvoke, kInstructionSunk, kNotInlinedUnresolvedEntrypoint, kNotInlinedDexCache, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 661abb125c..6dcbadba6e 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -17,12 +17,16 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ +#include <memory> +#include <vector> + #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" +#include "dex/standard_dex_file.h" #include "driver/dex_compilation_unit.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" @@ -99,18 +103,11 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator scoped_allocator_; }; -inline HGraph* CreateGraph(ArenaPoolAndAllocator* pool_and_allocator) { - return new (pool_and_allocator->GetAllocator()) HGraph( - pool_and_allocator->GetAllocator(), - pool_and_allocator->GetArenaStack(), - *reinterpret_cast<DexFile*>(pool_and_allocator->GetAllocator()->Alloc(sizeof(DexFile))), - /*method_idx*/-1, - kRuntimeISA); -} - -class OptimizingUnitTest : public CommonCompilerTest { - protected: - OptimizingUnitTest() : pool_and_allocator_(new ArenaPoolAndAllocator()) { } +// Have a separate helper so the OptimizingCFITest can inherit it without causing +// multiple inheritance errors from having two gtest as a parent twice. +class OptimizingUnitTestHelper { + public: + OptimizingUnitTestHelper() : pool_and_allocator_(new ArenaPoolAndAllocator()) { } ArenaAllocator* GetAllocator() { return pool_and_allocator_->GetAllocator(); } ArenaStack* GetArenaStack() { return pool_and_allocator_->GetArenaStack(); } @@ -122,14 +119,42 @@ class OptimizingUnitTest : public CommonCompilerTest { } HGraph* CreateGraph() { - return art::CreateGraph(pool_and_allocator_.get()); + ArenaAllocator* const allocator = pool_and_allocator_->GetAllocator(); + + // Reserve a big array of 0s so the dex file constructor can offsets from the header. + static constexpr size_t kDexDataSize = 4 * KB; + const uint8_t* dex_data = reinterpret_cast<uint8_t*>(allocator->Alloc(kDexDataSize)); + + // Create the dex file based on the fake data. Call the constructor so that we can use virtual + // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. + dex_files_.emplace_back(new StandardDexFile( + dex_data, + sizeof(StandardDexFile::Header), + "no_location", + /*location_checksum*/ 0, + /*oat_dex_file*/ nullptr, + /*container*/ nullptr)); + + return new (allocator) HGraph( + allocator, + pool_and_allocator_->GetArenaStack(), + *dex_files_.back(), + /*method_idx*/-1, + kRuntimeISA); } // Create a control-flow graph from Dex instructions. - HGraph* CreateCFG(const uint16_t* data, DataType::Type return_type = DataType::Type::kInt32) { - const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(data); + HGraph* CreateCFG(const std::vector<uint16_t>& data, + DataType::Type return_type = DataType::Type::kInt32) { HGraph* graph = CreateGraph(); + // The code item data might not aligned to 4 bytes, copy it to ensure that. + const size_t code_item_size = data.size() * sizeof(data.front()); + void* aligned_data = GetAllocator()->Alloc(code_item_size); + memcpy(aligned_data, &data[0], code_item_size); + CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment); + const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data); + { ScopedObjectAccess soa(Thread::Current()); if (handles_ == nullptr) { @@ -146,7 +171,7 @@ class OptimizingUnitTest : public CommonCompilerTest { /* access_flags */ 0u, /* verified_method */ nullptr, handles_->NewHandle<mirror::DexCache>(nullptr)); - CodeItemDebugInfoAccessor accessor(&graph->GetDexFile(), code_item); + CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u); HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type); bool graph_built = (builder.BuildGraph() == kAnalysisSuccess); return graph_built ? graph : nullptr; @@ -154,10 +179,13 @@ class OptimizingUnitTest : public CommonCompilerTest { } private: + std::vector<std::unique_ptr<const StandardDexFile>> dex_files_; std::unique_ptr<ArenaPoolAndAllocator> pool_and_allocator_; std::unique_ptr<VariableSizedHandleScope> handles_; }; +class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; + // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 4fc7fe9427..6ef386b4a5 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -29,10 +29,10 @@ namespace art { class PrettyPrinterTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; -void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) { +void PrettyPrinterTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); StringPrettyPrinter printer(graph); printer.VisitInsertionOrder(); @@ -40,7 +40,7 @@ void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) { } TEST_F(PrettyPrinterTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); const char* expected = @@ -67,7 +67,7 @@ TEST_F(PrettyPrinterTest, CFG1) { "BasicBlock 3, pred: 2\n" " 4: Exit\n"; - const uint16_t data[] = + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -89,7 +89,7 @@ TEST_F(PrettyPrinterTest, CFG2) { "BasicBlock 4, pred: 3\n" " 5: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -111,21 +111,21 @@ TEST_F(PrettyPrinterTest, CFG3) { "BasicBlock 4, pred: 2\n" " 5: Exit\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, expected); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -144,13 +144,13 @@ TEST_F(PrettyPrinterTest, CFG4) { "BasicBlock 3, pred: 0, succ: 1\n" " 0: Goto 1\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, expected); @@ -166,7 +166,7 @@ TEST_F(PrettyPrinterTest, CFG5) { "BasicBlock 3, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -192,7 +192,7 @@ TEST_F(PrettyPrinterTest, CFG6) { "BasicBlock 5, pred: 1, succ: 3\n" " 0: Goto 3\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -220,7 +220,7 @@ TEST_F(PrettyPrinterTest, CFG7) { "BasicBlock 6, pred: 1, succ: 2\n" " 1: Goto 2\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -240,7 +240,7 @@ TEST_F(PrettyPrinterTest, IntConstant) { "BasicBlock 2, pred: 1\n" " 4: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 3748d599a3..a70b0664dc 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -46,7 +46,7 @@ class RegisterAllocatorTest : public OptimizingUnitTest { void ExpectedInRegisterHint(Strategy strategy); // Helper functions that make use of the OptimizingUnitTest's members. - bool Check(const uint16_t* data, Strategy strategy); + bool Check(const std::vector<uint16_t>& data, Strategy strategy); void CFG1(Strategy strategy); void Loop1(Strategy strategy); void Loop2(Strategy strategy); @@ -79,7 +79,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } -bool RegisterAllocatorTest::Check(const uint16_t* data, Strategy strategy) { +bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); @@ -185,7 +185,7 @@ void RegisterAllocatorTest::CFG1(Strategy strategy) { * | * exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -222,7 +222,7 @@ void RegisterAllocatorTest::Loop1(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -268,7 +268,7 @@ void RegisterAllocatorTest::Loop2(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 8 << 12 | 1 << 8, Instruction::IF_EQ | 1 << 8, 7, @@ -314,7 +314,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { * exit */ - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8, Instruction::CONST_4 | 5 << 12 | 2 << 8, @@ -351,7 +351,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { TEST_ALL_STRATEGIES(Loop3); TEST_F(RegisterAllocatorTest, FirstRegisterUse) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8, @@ -402,7 +402,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) { * } while (true); */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 0, Instruction::IF_NE | 1 << 8 | 1 << 12, 3, @@ -432,7 +432,7 @@ TEST_ALL_STRATEGIES(DeadPhi); * This test only applies to the linear scan allocator. */ TEST_F(RegisterAllocatorTest, FreeUntil) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index b3c8f105d1..8dcadaad2e 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -330,10 +330,12 @@ bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) { } } else if (c == kCondLE || c == kCondGT) { if (value < std::numeric_limits<int64_t>::max() && - !codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { + !codegen_->GetAssembler()->ShifterOperandCanHold( + SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) { return false; } - } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { + } else if (!codegen_->GetAssembler()->ShifterOperandCanHold( + SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) { return false; } } diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 104ebc79c2..fb15fc8975 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -182,7 +182,9 @@ class SchedulerTest : public OptimizingUnitTest { scheduler->Schedule(graph_); } - void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) { + void CompileWithRandomSchedulerAndRun(const std::vector<uint16_t>& data, + bool has_result, + int expected) { for (CodegenTargetConfig target_config : GetTargetConfigs()) { HGraph* graph = CreateCFG(data); @@ -393,7 +395,7 @@ TEST_F(SchedulerTest, RandomScheduling) { // } // return result; // - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0 Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000 Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1 diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 77e70d733e..85ed06eb9b 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -31,7 +31,7 @@ namespace art { class SsaTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; class SsaPrettyPrinter : public HPrettyPrinter { @@ -80,7 +80,7 @@ static void ReNumberInstructions(HGraph* graph) { } } -void SsaTest::TestCode(const uint16_t* data, const char* expected) { +void SsaTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. @@ -119,7 +119,7 @@ TEST_F(SsaTest, CFG1) { "BasicBlock 5, pred: 1, succ: 3\n" " 7: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -150,7 +150,7 @@ TEST_F(SsaTest, CFG2) { "BasicBlock 5, pred: 1, succ: 3\n" " 9: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -181,7 +181,7 @@ TEST_F(SsaTest, CFG3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -214,7 +214,7 @@ TEST_F(SsaTest, Loop1) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -245,7 +245,7 @@ TEST_F(SsaTest, Loop2) { "BasicBlock 5, pred: 4\n" " 9: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -276,7 +276,7 @@ TEST_F(SsaTest, Loop3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -310,7 +310,7 @@ TEST_F(SsaTest, Loop4) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -351,7 +351,7 @@ TEST_F(SsaTest, Loop5) { " 13: Phi(2, 1) [11, 8, 8]\n" " 14: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -390,7 +390,7 @@ TEST_F(SsaTest, Loop6) { "BasicBlock 7, pred: 6\n" " 13: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -432,7 +432,7 @@ TEST_F(SsaTest, Loop7) { "BasicBlock 8, pred: 2, succ: 6\n" " 15: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -456,7 +456,7 @@ TEST_F(SsaTest, DeadLocal) { "BasicBlock 2, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -484,7 +484,7 @@ TEST_F(SsaTest, LocalInIf) { "BasicBlock 5, pred: 1, succ: 3\n" " 8: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 1 << 8, @@ -520,7 +520,7 @@ TEST_F(SsaTest, MultiplePredecessors) { "BasicBlock 7, pred: 3, succ: 5\n" " 12: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 7e83f8ce5f..33823e2a11 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,10 +30,10 @@ namespace art { class SuspendCheckTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data); + void TestCode(const std::vector<uint16_t>& data); }; -void SuspendCheckTest::TestCode(const uint16_t* data) { +void SuspendCheckTest::TestCode(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor(); HBasicBlock* loop_header = first_block->GetSingleSuccessor(); @@ -43,7 +43,7 @@ void SuspendCheckTest::TestCode(const uint16_t* data) { } TEST_F(SuspendCheckTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -51,14 +51,14 @@ TEST_F(SuspendCheckTest, CFG1) { } TEST_F(SuspendCheckTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data); } TEST_F(SuspendCheckTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 0xFFFF, Instruction::RETURN_VOID); @@ -67,7 +67,7 @@ TEST_F(SuspendCheckTest, CFG3) { } TEST_F(SuspendCheckTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NE, 0xFFFF, Instruction::RETURN_VOID); @@ -76,7 +76,7 @@ TEST_F(SuspendCheckTest, CFG4) { } TEST_F(SuspendCheckTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQZ, 0xFFFF, Instruction::RETURN_VOID); @@ -85,7 +85,7 @@ TEST_F(SuspendCheckTest, CFG5) { } TEST_F(SuspendCheckTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NEZ, 0xFFFF, Instruction::RETURN_VOID); diff --git a/compiler/utils/arm/assembler_arm_shared.h b/compiler/utils/arm/assembler_arm_shared.h index 21f13eeab7..7464052d93 100644 --- a/compiler/utils/arm/assembler_arm_shared.h +++ b/compiler/utils/arm/assembler_arm_shared.h @@ -40,13 +40,6 @@ enum StoreOperandType { kStoreDWord }; -// Set condition codes request. -enum SetCc { - kCcDontCare, // Allows prioritizing 16-bit instructions on Thumb2 whether they set CCs or not. - kCcSet, - kCcKeep, -}; - } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm_test.h b/compiler/utils/arm/assembler_arm_test.h deleted file mode 100644 index 8c3a11f2cf..0000000000 --- a/compiler/utils/arm/assembler_arm_test.h +++ /dev/null @@ -1,555 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_TEST_H_ -#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_TEST_H_ - -#include "utils/assembler_test.h" - -namespace art { - -template<typename Ass, - typename Reg, - typename FPReg, - typename Imm, - typename SOp, - typename Cond, - typename SetCc> -class AssemblerArmTest : public AssemblerTest<Ass, Reg, FPReg, Imm> { - public: - typedef AssemblerTest<Ass, Reg, FPReg, Imm> Base; - - using Base::GetRegisters; - using Base::GetRegName; - using Base::CreateImmediate; - using Base::WarnOnCombinations; - - static constexpr int64_t kFullImmRangeThreshold = 32; - - virtual void FillImmediates(std::vector<Imm>& immediates, int64_t imm_min, int64_t imm_max) { - // Small range: do completely. - if (imm_max - imm_min <= kFullImmRangeThreshold) { - for (int64_t i = imm_min; i <= imm_max; ++i) { - immediates.push_back(CreateImmediate(i)); - } - } else { - immediates.push_back(CreateImmediate(imm_min)); - immediates.push_back(CreateImmediate(imm_max)); - if (imm_min < imm_max - 1) { - immediates.push_back(CreateImmediate(imm_min + 1)); - } - if (imm_min < imm_max - 2) { - immediates.push_back(CreateImmediate(imm_min + 2)); - } - if (imm_min < imm_max - 3) { - immediates.push_back(CreateImmediate(imm_max - 1)); - } - if (imm_min < imm_max - 4) { - immediates.push_back(CreateImmediate((imm_min + imm_max) / 2)); - } - } - } - - std::string RepeatRRIIC(void (Ass::*f)(Reg, Reg, Imm, Imm, Cond), - int64_t imm1_min, int64_t imm1_max, - int64_t imm2_min, int64_t imm2_max, - std::string fmt) { - return RepeatTemplatedRRIIC(f, GetRegisters(), GetRegisters(), - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - imm1_min, imm1_max, imm2_min, imm2_max, - fmt); - } - - template <typename Reg1, typename Reg2> - std::string RepeatTemplatedRRIIC(void (Ass::*f)(Reg1, Reg2, Imm, Imm, Cond), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - std::string (AssemblerArmTest::*GetName1)(const Reg1&), - std::string (AssemblerArmTest::*GetName2)(const Reg2&), - int64_t imm1_min, int64_t imm1_max, - int64_t imm2_min, int64_t imm2_max, - std::string fmt) { - std::vector<Imm> immediates1; - FillImmediates(immediates1, imm1_min, imm1_max); - std::vector<Imm> immediates2; - FillImmediates(immediates2, imm2_min, imm2_max); - - std::vector<Cond>& cond = GetConditions(); - - WarnOnCombinations(cond.size() * immediates1.size() * immediates2.size() * - reg1_registers.size() * reg2_registers.size()); - - std::ostringstream oss; - bool first = true; - for (Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (Imm i : immediates1) { - std::string base = after_cond; - - size_t imm1_index = base.find(IMM1_TOKEN); - if (imm1_index != std::string::npos) { - std::ostringstream sreg; - sreg << i; - std::string imm_string = sreg.str(); - base.replace(imm1_index, ConstexprStrLen(IMM1_TOKEN), imm_string); - } - - for (Imm j : immediates2) { - std::string base2 = base; - - size_t imm2_index = base2.find(IMM2_TOKEN); - if (imm2_index != std::string::npos) { - std::ostringstream sreg; - sreg << j; - std::string imm_string = sreg.str(); - base2.replace(imm2_index, ConstexprStrLen(IMM2_TOKEN), imm_string); - } - - for (auto reg1 : reg1_registers) { - std::string base3 = base2; - - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base3.find(Base::REG1_TOKEN)) != std::string::npos) { - base3.replace(reg1_index, ConstexprStrLen(Base::REG1_TOKEN), reg1_string); - } - - for (auto reg2 : reg2_registers) { - std::string base4 = base3; - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base4.find(Base::REG2_TOKEN)) != std::string::npos) { - base4.replace(reg2_index, ConstexprStrLen(Base::REG2_TOKEN), reg2_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << base4; - - (Base::GetAssembler()->*f)(*reg1, *reg2, i, j, c); - } - } - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - std::string RepeatRRiiC(void (Ass::*f)(Reg, Reg, Imm, Imm, Cond), - std::vector<std::pair<Imm, Imm>>& immediates, - std::string fmt) { - return RepeatTemplatedRRiiC<Reg, Reg>(f, GetRegisters(), GetRegisters(), - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - immediates, fmt); - } - - template <typename Reg1, typename Reg2> - std::string RepeatTemplatedRRiiC(void (Ass::*f)(Reg1, Reg2, Imm, Imm, Cond), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - std::string (AssemblerArmTest::*GetName1)(const Reg1&), - std::string (AssemblerArmTest::*GetName2)(const Reg2&), - std::vector<std::pair<Imm, Imm>>& immediates, - std::string fmt) { - std::vector<Cond>& cond = GetConditions(); - - WarnOnCombinations(cond.size() * immediates.size() * reg1_registers.size() * - reg2_registers.size()); - - std::ostringstream oss; - bool first = true; - for (Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (std::pair<Imm, Imm>& pair : immediates) { - Imm i = pair.first; - Imm j = pair.second; - std::string after_imm1 = after_cond; - - size_t imm1_index = after_imm1.find(IMM1_TOKEN); - if (imm1_index != std::string::npos) { - std::ostringstream sreg; - sreg << i; - std::string imm_string = sreg.str(); - after_imm1.replace(imm1_index, ConstexprStrLen(IMM1_TOKEN), imm_string); - } - - std::string after_imm2 = after_imm1; - - size_t imm2_index = after_imm2.find(IMM2_TOKEN); - if (imm2_index != std::string::npos) { - std::ostringstream sreg; - sreg << j; - std::string imm_string = sreg.str(); - after_imm2.replace(imm2_index, ConstexprStrLen(IMM2_TOKEN), imm_string); - } - - for (auto reg1 : reg1_registers) { - std::string after_reg1 = after_imm2; - - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = after_reg1.find(Base::REG1_TOKEN)) != std::string::npos) { - after_reg1.replace(reg1_index, ConstexprStrLen(Base::REG1_TOKEN), reg1_string); - } - - for (auto reg2 : reg2_registers) { - std::string after_reg2 = after_reg1; - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = after_reg2.find(Base::REG2_TOKEN)) != std::string::npos) { - after_reg2.replace(reg2_index, ConstexprStrLen(Base::REG2_TOKEN), reg2_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << after_reg2; - - (Base::GetAssembler()->*f)(*reg1, *reg2, i, j, c); - } - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - std::string RepeatRRC(void (Ass::*f)(Reg, Reg, Cond), std::string fmt) { - return RepeatTemplatedRRC(f, GetRegisters(), GetRegisters(), GetConditions(), - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - fmt); - } - - template <typename Reg1, typename Reg2> - std::string RepeatTemplatedRRC(void (Ass::*f)(Reg1, Reg2, Cond), - const std::vector<Reg1*>& reg1_registers, - const std::vector<Reg2*>& reg2_registers, - const std::vector<Cond>& cond, - std::string (AssemblerArmTest::*GetName1)(const Reg1&), - std::string (AssemblerArmTest::*GetName2)(const Reg2&), - std::string fmt) { - WarnOnCombinations(cond.size() * reg1_registers.size() * reg2_registers.size()); - - std::ostringstream oss; - bool first = true; - for (const Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (auto reg1 : reg1_registers) { - std::string after_reg1 = after_cond; - - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = after_reg1.find(Base::REG1_TOKEN)) != std::string::npos) { - after_reg1.replace(reg1_index, ConstexprStrLen(Base::REG1_TOKEN), reg1_string); - } - - for (auto reg2 : reg2_registers) { - std::string after_reg2 = after_reg1; - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = after_reg2.find(Base::REG2_TOKEN)) != std::string::npos) { - after_reg2.replace(reg2_index, ConstexprStrLen(Base::REG2_TOKEN), reg2_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << after_reg2; - - (Base::GetAssembler()->*f)(*reg1, *reg2, c); - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - std::string RepeatRRRC(void (Ass::*f)(Reg, Reg, Reg, Cond), std::string fmt) { - return RepeatTemplatedRRRC(f, GetRegisters(), GetRegisters(), GetRegisters(), GetConditions(), - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - &AssemblerArmTest::template GetRegName<RegisterView::kUsePrimaryName>, - fmt); - } - - template <typename Reg1, typename Reg2, typename Reg3> - std::string RepeatTemplatedRRRC(void (Ass::*f)(Reg1, Reg2, Reg3, Cond), - const std::vector<Reg1*>& reg1_registers, - const std::vector<Reg2*>& reg2_registers, - const std::vector<Reg3*>& reg3_registers, - const std::vector<Cond>& cond, - std::string (AssemblerArmTest::*GetName1)(const Reg1&), - std::string (AssemblerArmTest::*GetName2)(const Reg2&), - std::string (AssemblerArmTest::*GetName3)(const Reg3&), - std::string fmt) { - WarnOnCombinations(cond.size() * reg1_registers.size() * reg2_registers.size() * - reg3_registers.size()); - - std::ostringstream oss; - bool first = true; - for (const Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (auto reg1 : reg1_registers) { - std::string after_reg1 = after_cond; - - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = after_reg1.find(Base::REG1_TOKEN)) != std::string::npos) { - after_reg1.replace(reg1_index, ConstexprStrLen(Base::REG1_TOKEN), reg1_string); - } - - for (auto reg2 : reg2_registers) { - std::string after_reg2 = after_reg1; - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = after_reg2.find(Base::REG2_TOKEN)) != std::string::npos) { - after_reg2.replace(reg2_index, ConstexprStrLen(Base::REG2_TOKEN), reg2_string); - } - - for (auto reg3 : reg3_registers) { - std::string after_reg3 = after_reg2; - - std::string reg3_string = (this->*GetName3)(*reg3); - size_t reg3_index; - while ((reg3_index = after_reg3.find(REG3_TOKEN)) != std::string::npos) { - after_reg3.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << after_reg3; - - (Base::GetAssembler()->*f)(*reg1, *reg2, *reg3, c); - } - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - template <typename RegT> - std::string RepeatTemplatedRSC(void (Ass::*f)(RegT, SOp, Cond), - const std::vector<RegT*>& registers, - const std::vector<SOp>& shifts, - const std::vector<Cond>& cond, - std::string (AssemblerArmTest::*GetName)(const RegT&), - std::string fmt) { - WarnOnCombinations(cond.size() * registers.size() * shifts.size()); - - std::ostringstream oss; - bool first = true; - for (const Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (const SOp& shift : shifts) { - std::string after_shift = after_cond; - - std::string shift_string = GetShiftString(shift); - size_t shift_index; - while ((shift_index = after_shift.find(Base::SHIFT_TOKEN)) != std::string::npos) { - after_shift.replace(shift_index, ConstexprStrLen(Base::SHIFT_TOKEN), shift_string); - } - - for (auto reg : registers) { - std::string after_reg = after_shift; - - std::string reg_string = (this->*GetName)(*reg); - size_t reg_index; - while ((reg_index = after_reg.find(Base::REG_TOKEN)) != std::string::npos) { - after_reg.replace(reg_index, ConstexprStrLen(Base::REG_TOKEN), reg_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << after_reg; - - (Base::GetAssembler()->*f)(*reg, shift, c); - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - template <typename Reg1, typename Reg2> - std::string RepeatTemplatedRRSC(void (Ass::*f)(Reg1, Reg2, const SOp&, Cond), - const std::vector<Reg1*>& reg1_registers, - const std::vector<Reg2*>& reg2_registers, - const std::vector<SOp>& shifts, - const std::vector<Cond>& cond, - std::string (AssemblerArmTest::*GetName1)(const Reg1&), - std::string (AssemblerArmTest::*GetName2)(const Reg2&), - std::string fmt) { - WarnOnCombinations(cond.size() * reg1_registers.size() * reg2_registers.size() * shifts.size()); - - std::ostringstream oss; - bool first = true; - for (const Cond& c : cond) { - std::string after_cond = fmt; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - for (const SOp& shift : shifts) { - std::string after_shift = after_cond; - - std::string shift_string = GetShiftString(shift); - size_t shift_index; - while ((shift_index = after_shift.find(SHIFT_TOKEN)) != std::string::npos) { - after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string); - } - - for (auto reg1 : reg1_registers) { - std::string after_reg1 = after_shift; - - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = after_reg1.find(Base::REG1_TOKEN)) != std::string::npos) { - after_reg1.replace(reg1_index, ConstexprStrLen(Base::REG1_TOKEN), reg1_string); - } - - for (auto reg2 : reg2_registers) { - std::string after_reg2 = after_reg1; - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = after_reg2.find(Base::REG2_TOKEN)) != std::string::npos) { - after_reg2.replace(reg2_index, ConstexprStrLen(Base::REG2_TOKEN), reg2_string); - } - - if (first) { - first = false; - } else { - oss << "\n"; - } - oss << after_reg2; - - (Base::GetAssembler()->*f)(*reg1, *reg2, shift, c); - } - } - } - } - // Add a newline at the end. - oss << "\n"; - - return oss.str(); - } - - protected: - AssemblerArmTest() {} - - virtual std::vector<Cond>& GetConditions() = 0; - virtual std::string GetConditionString(Cond c) = 0; - - virtual std::vector<SetCc>& GetSetCcs() = 0; - virtual std::string GetSetCcString(SetCc s) = 0; - - virtual std::vector<SOp>& GetShiftOperands() = 0; - virtual std::string GetShiftString(SOp sop) = 0; - - virtual Reg GetPCRegister() = 0; - virtual std::vector<Reg*> GetRegistersWithoutPC() { - std::vector<Reg*> without_pc = GetRegisters(); - Reg pc_reg = GetPCRegister(); - - for (auto it = without_pc.begin(); it != without_pc.end(); ++it) { - if (**it == pc_reg) { - without_pc.erase(it); - break; - } - } - - return without_pc; - } - - static constexpr const char* IMM1_TOKEN = "{imm1}"; - static constexpr const char* IMM2_TOKEN = "{imm2}"; - static constexpr const char* REG3_TOKEN = "{reg3}"; - static constexpr const char* REG4_TOKEN = "{reg4}"; - static constexpr const char* COND_TOKEN = "{cond}"; - static constexpr const char* SET_CC_TOKEN = "{s}"; - static constexpr const char* SHIFT_TOKEN = "{shift}"; - - private: - DISALLOW_COPY_AND_ASSIGN(AssemblerArmTest); -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_TEST_H_ diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index d6b24da407..05250a4157 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -112,12 +112,14 @@ bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { return vixl_masm_.IsModifiedImmediate(immediate); } -bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) { +bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, + uint32_t immediate, + vixl::aarch32::FlagsUpdate update_flags) { switch (opcode) { case ADD: case SUB: // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. - if (IsUint<12>(immediate) && set_cc != kCcSet) { + if (IsUint<12>(immediate) && update_flags != vixl::aarch32::SetFlags) { return true; } return ShifterOperandCanAlwaysHold(immediate); diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 1377e64073..b0310f2fb6 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -218,7 +218,9 @@ class ArmVIXLAssembler FINAL : public Assembler { void StoreRegisterList(RegList regs, size_t stack_offset); bool ShifterOperandCanAlwaysHold(uint32_t immediate); - bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc = kCcDontCare); + bool ShifterOperandCanHold(Opcode opcode, + uint32_t immediate, + vixl::aarch32::FlagsUpdate update_flags = vixl::aarch32::DontCare); bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, int32_t offset, /*out*/ int32_t* add_to_base, diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 2b3e979606..065c3de23c 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -492,7 +492,7 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, temps.Exclude(in_reg.AsVIXLRegister()); ___ Cmp(in_reg.AsVIXLRegister(), 0); - if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { + if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value())) { if (!out_reg.Equals(in_reg)) { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 3 * vixl32::kMaxInstructionSizeInBytes, @@ -531,7 +531,7 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) ___ Cmp(scratch.AsVIXLRegister(), 0); - if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { + if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value())) { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 0a094352e4..674dc9a78b 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", + " 224: f8d9 c2c4 ldr.w ip, [r9, #708] ; 0x2c4\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h index 04fba51dc1..58f1ec7b08 100644 --- a/compiler/utils/test_dex_file_builder.h +++ b/compiler/utils/test_dex_file_builder.h @@ -27,6 +27,7 @@ #include <android-base/logging.h> #include "base/bit_utils.h" +#include "dex/art_dex_file_loader.h" #include "dex/dex_file_loader.h" #include "dex/standard_dex_file.h" @@ -233,7 +234,8 @@ class TestDexFileBuilder { static constexpr bool kVerify = false; static constexpr bool kVerifyChecksum = false; std::string error_msg; - std::unique_ptr<const DexFile> dex_file(DexFileLoader::Open( + const ArtDexFileLoader dex_file_loader; + std::unique_ptr<const DexFile> dex_file(dex_file_loader.Open( &dex_file_data_[0], dex_file_data_.size(), dex_location, |