diff options
Diffstat (limited to 'compiler')
55 files changed, 2956 insertions, 559 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 01f761b877..2e60e7d658 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -89,6 +89,7 @@ art_cc_defaults { "optimizing/ssa_liveness_analysis.cc", "optimizing/ssa_phi_elimination.cc", "optimizing/stack_map_stream.cc", + "optimizing/superblock_cloner.cc", "trampolines/trampoline_compiler.cc", "utils/assembler.cc", "utils/jni_macro_assembler.cc", diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 05fdc97e07..8af29d44f0 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -23,7 +23,6 @@ #include "common_runtime_test.h" #include "compiler.h" -#include "jit/profile_compilation_info.h" #include "oat_file.h" namespace art { @@ -34,6 +33,7 @@ class ClassLoader; class CompilerDriver; class CompilerOptions; class CumulativeLogger; +class ProfileCompilationInfo; class VerificationResults; template<class T> class Handle; diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index e41371855d..0f69dbab94 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -159,4 +159,10 @@ CompiledMethod::~CompiledMethod() { storage->ReleaseMethodInfo(method_info_); } +void CompiledMethod::ReleaseVMapTable() { + CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage(); + storage->ReleaseVMapTable(vmap_table_); + vmap_table_ = nullptr; +} + } // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index acdce260e5..4e8f3efe5a 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -168,6 +168,10 @@ class CompiledMethod FINAL : public CompiledCode { ArrayRef<const linker::LinkerPatch> GetPatches() const; + // The compiler sometimes unquickens shared code items. In that case, we need to clear the vmap + // table to avoid writing the quicken info to the vdex file. + void ReleaseVMapTable(); + private: static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits; static constexpr size_t kIsIntrinsicSize = 1u; @@ -186,7 +190,7 @@ class CompiledMethod FINAL : public CompiledCode { // For quick code, method specific information that is not very dedupe friendly (method indices). const LengthPrefixedArray<uint8_t>* const method_info_; // For quick code, holds code infos which contain stack maps, inline information, and etc. - const LengthPrefixedArray<uint8_t>* const vmap_table_; + const LengthPrefixedArray<uint8_t>* vmap_table_; // For quick code, a FDE entry for the debug_frame section. const LengthPrefixedArray<uint8_t>* const cfi_info_; // For quick code, linker patches needed by the method. diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index bb2a214ecd..df5bb37358 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -137,10 +137,17 @@ static std::vector<uint8_t> MakeElfFileForJITInternal( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& mi) { - CHECK_EQ(mi.is_code_address_text_relative, false); + ArrayRef<const MethodDebugInfo> method_infos) { + CHECK_GT(method_infos.size(), 0u); + uint64_t min_address = std::numeric_limits<uint64_t>::max(); + uint64_t max_address = 0; + for (const MethodDebugInfo& mi : method_infos) { + CHECK_EQ(mi.is_code_address_text_relative, false); + min_address = std::min(min_address, mi.code_address); + max_address = std::max(max_address, mi.code_address + mi.code_size); + } DebugInfo debug_info{}; - debug_info.compiled_methods = ArrayRef<const debug::MethodDebugInfo>(&mi, 1); + debug_info.compiled_methods = method_infos; std::vector<uint8_t> buffer; buffer.reserve(KB); linker::VectorOutputStream out("Debug ELF file", &buffer); @@ -151,14 +158,14 @@ static std::vector<uint8_t> MakeElfFileForJITInternal( if (mini_debug_info) { std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa, features, - mi.code_address, - mi.code_size, + min_address, + max_address - min_address, /* dex_section_address */ 0, /* dex_section_size */ 0, debug_info); builder->WriteSection(".gnu_debugdata", &mdi); } else { - builder->GetText()->AllocateVirtualMemory(mi.code_address, mi.code_size); + builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address); WriteDebugInfo(builder.get(), debug_info, dwarf::DW_DEBUG_FRAME_FORMAT, @@ -173,11 +180,11 @@ std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& method_info) { + ArrayRef<const MethodDebugInfo> method_infos) { if (Is64BitInstructionSet(isa)) { - return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_info); + return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_infos); } else { - return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_info); + return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_infos); } } diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index 8ad0c4219a..e442e0016c 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -54,7 +54,7 @@ std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - const MethodDebugInfo& method_info); + ArrayRef<const MethodDebugInfo> method_infos); std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index a853714d2b..9c9e8b35b8 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -72,8 +72,8 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, continue; // Add symbol only for the first instance. } size_t name_offset; - if (!info.trampoline_name.empty()) { - name_offset = strtab->Write(info.trampoline_name); + if (!info.custom_name.empty()) { + name_offset = strtab->Write(info.custom_name); } else { DCHECK(info.dex_file != nullptr); std::string name = info.dex_file->PrettyMethod(info.dex_method_index, !mini_debug_info); diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h index 43c8de26aa..d0b03ec441 100644 --- a/compiler/debug/method_debug_info.h +++ b/compiler/debug/method_debug_info.h @@ -27,7 +27,7 @@ namespace art { namespace debug { struct MethodDebugInfo { - std::string trampoline_name; + std::string custom_name; const DexFile* dex_file; // Native methods (trampolines) do not reference dex file. size_t class_def_index; uint32_t dex_method_index; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 308e75d9c1..28c7fe2c34 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -28,6 +28,7 @@ #include "compiled_method.h" #include "dex/dex_file-inl.h" #include "dex/dex_instruction-inl.h" +#include "dex_to_dex_decompiler.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "mirror/dex_cache.h" @@ -44,81 +45,106 @@ const bool kEnableQuickening = true; // Control check-cast elision. const bool kEnableCheckCastEllision = true; -struct QuickenedInfo { - QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} +DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver) + : driver_(driver), + lock_("Quicken lock", kDexToDexCompilerLock) { + DCHECK(driver != nullptr); +} - uint32_t dex_pc; - uint16_t dex_member_index; -}; +void DexToDexCompiler::ClearState() { + MutexLock lock(Thread::Current(), lock_); + active_dex_file_ = nullptr; + active_bit_vector_ = nullptr; + seen_code_items_.clear(); + should_quicken_.clear(); + shared_code_items_.clear(); + blacklisted_code_items_.clear(); + shared_code_item_quicken_info_.clear(); +} -class DexCompiler { - public: - DexCompiler(art::CompilerDriver& compiler, - const DexCompilationUnit& unit, - DexToDexCompilationLevel dex_to_dex_compilation_level) - : driver_(compiler), - unit_(unit), - dex_to_dex_compilation_level_(dex_to_dex_compilation_level) {} +size_t DexToDexCompiler::NumUniqueCodeItems(Thread* self) const { + MutexLock lock(self, lock_); + return seen_code_items_.size(); +} - ~DexCompiler() {} +BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) { + if (active_dex_file_ != dex_file) { + active_dex_file_ = dex_file; + auto inserted = should_quicken_.emplace(dex_file, + BitVector(dex_file->NumMethodIds(), + /*expandable*/ false, + Allocator::GetMallocAllocator())); + active_bit_vector_ = &inserted.first->second; + } + return active_bit_vector_; +} - void Compile(); +void DexToDexCompiler::MarkForCompilation(Thread* self, + const MethodReference& method_ref, + const DexFile::CodeItem* code_item) { + MutexLock lock(self, lock_); + BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file); + DCHECK(bitmap != nullptr); + DCHECK(!bitmap->IsBitSet(method_ref.index)); + bitmap->SetBit(method_ref.index); + // Detect the shared code items. + if (!seen_code_items_.insert(code_item).second) { + shared_code_items_.insert(code_item); + } +} - const std::vector<QuickenedInfo>& GetQuickenedInfo() const { - return quickened_info_; +DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler, + const DexCompilationUnit& unit, + const CompilationLevel compilation_level, + const std::vector<uint8_t>* quicken_data) + : compiler_(compiler), + driver_(*compiler->GetDriver()), + unit_(unit), + compilation_level_(compilation_level), + already_quickened_(quicken_data != nullptr), + existing_quicken_info_(already_quickened_ + ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {} + +uint16_t DexToDexCompiler::CompilationState::NextIndex() { + DCHECK(already_quickened_); + if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) { + for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) { + LOG(ERROR) << pair->DumpString(nullptr); + } + LOG(FATAL) << "Mismatched number of quicken slots."; } + const uint16_t ret = existing_quicken_info_.GetData(quicken_index_); + quicken_index_++; + return ret; +} - private: - const DexFile& GetDexFile() const { - return *unit_.GetDexFile(); +uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst, + uint32_t index) { + if (UNLIKELY(already_quickened_)) { + return inst->IsQuickened() ? NextIndex() : index; } + DCHECK(!inst->IsQuickened()); + return index; +} + +bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) { + // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only + // only call ShouldCompileMethod on one dex at a time. + MutexLock lock(Thread::Current(), lock_); + return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index); +} - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where - // a barrier is required. - void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); - - // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In - // this case, returns the second NOP instruction pointer. Otherwise, returns - // the given "inst". - Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); - - // Compiles a field access into a quick field access. - // The field index is replaced by an offset within an Object where we can read - // from / write to this field. Therefore, this does not involve any resolution - // at runtime. - // Since the field index is encoded with 16 bits, we can replace it only if the - // field offset can be encoded with 16 bits too. - void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_put); - - // Compiles a virtual method invocation into a quick virtual method invocation. - // The method index is replaced by the vtable index where the corresponding - // Executable can be found. Therefore, this does not involve any resolution - // at runtime. - // Since the method index is encoded with 16 bits, we can replace it only if the - // vtable index can be encoded with 16 bits too. - void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range); - - CompilerDriver& driver_; - const DexCompilationUnit& unit_; - const DexToDexCompilationLevel dex_to_dex_compilation_level_; - - // Filled by the compiler when quickening, in order to encode that information - // in the .oat file. The runtime will use that information to get to the original - // opcodes. - std::vector<QuickenedInfo> quickened_info_; - - DISALLOW_COPY_AND_ASSIGN(DexCompiler); -}; - -void DexCompiler::Compile() { - DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize); - IterationRange<DexInstructionIterator> instructions(unit_.GetCodeItemAccessor().begin(), - unit_.GetCodeItemAccessor().end()); +std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() { + DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize); + const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor(); for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) { const uint32_t dex_pc = it.DexPc(); Instruction* inst = const_cast<Instruction*>(&it.Inst()); + + if (!already_quickened_) { + DCHECK(!inst->IsQuickened()); + } + switch (inst->Opcode()) { case Instruction::RETURN_VOID: CompileReturnVoid(inst, dex_pc); @@ -134,84 +160,147 @@ void DexCompiler::Compile() { break; case Instruction::IGET: + case Instruction::IGET_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false); break; case Instruction::IGET_WIDE: + case Instruction::IGET_WIDE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false); break; case Instruction::IGET_OBJECT: + case Instruction::IGET_OBJECT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false); break; case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BOOLEAN_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false); break; case Instruction::IGET_BYTE: + case Instruction::IGET_BYTE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false); break; case Instruction::IGET_CHAR: + case Instruction::IGET_CHAR_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false); break; case Instruction::IGET_SHORT: + case Instruction::IGET_SHORT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false); break; case Instruction::IPUT: + case Instruction::IPUT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true); break; case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BOOLEAN_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true); break; case Instruction::IPUT_BYTE: + case Instruction::IPUT_BYTE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true); break; case Instruction::IPUT_CHAR: + case Instruction::IPUT_CHAR_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true); break; case Instruction::IPUT_SHORT: + case Instruction::IPUT_SHORT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true); break; case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true); break; case Instruction::IPUT_OBJECT: + case Instruction::IPUT_OBJECT_QUICK: CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true); break; case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false); break; case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true); break; case Instruction::NOP: - // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid - // index in the map for normal nops. This should be rare in real code. - quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16)); + if (already_quickened_) { + const uint16_t reference_index = NextIndex(); + quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index)); + if (reference_index == DexFile::kDexNoIndex16) { + // This means it was a normal nop and not a check-cast. + break; + } + const uint16_t type_index = NextIndex(); + if (driver_.IsSafeCast(&unit_, dex_pc)) { + quickened_info_.push_back(QuickenedInfo(dex_pc, type_index)); + } + ++it; + } else { + // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid + // index in the map for normal nops. This should be rare in real code. + quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16)); + } break; default: - DCHECK(!inst->IsQuickened()); // Nothing to do. break; } } + + if (already_quickened_) { + DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices()); + } + + if (GetQuickenedInfo().empty()) { + // No need to create a CompiledMethod if there are no quickened opcodes. + return std::vector<uint8_t>(); + } + + std::vector<uint8_t> quicken_data; + if (kIsDebugBuild) { + // Double check that the counts line up with the size of the quicken info. + size_t quicken_count = 0; + for (const DexInstructionPcPair& pair : instructions) { + if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { + ++quicken_count; + } + } + CHECK_EQ(quicken_count, GetQuickenedInfo().size()); + } + + QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size()); + // Length is encoded by the constructor. + for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) { + // Dex pc is not serialized, only used for checking the instructions. Since we access the + // array based on the index of the quickened instruction, the indexes must line up perfectly. + // The reader side uses the NeedsIndexForInstruction function too. + const Instruction& inst = instructions.InstructionAt(info.dex_pc); + CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode(); + builder.AddIndex(info.dex_member_index); + } + DCHECK(!quicken_data.empty()); + return quicken_data; } -void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { +void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID); if (unit_.IsConstructor()) { // Are we compiling a non clinit constructor which needs a barrier ? @@ -229,7 +318,8 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER); } -Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { +Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst, + uint32_t dex_pc) { if (!kEnableCheckCastEllision) { return inst; } @@ -246,27 +336,30 @@ Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { << " by replacing it with 2 NOPs at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c())); - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c())); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. - // Get to next instruction which is the second half of check-cast and replace - // it by a NOP. - inst = const_cast<Instruction*>(inst->Next()); - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. + if (!already_quickened_) { + quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c())); + quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c())); + + // We are modifying 4 consecutive bytes. + inst->SetOpcode(Instruction::NOP); + inst->SetVRegA_10x(0u); // keep compliant with verifier. + // Get to next instruction which is the second half of check-cast and replace + // it by a NOP. + inst = const_cast<Instruction*>(inst->Next()); + inst->SetOpcode(Instruction::NOP); + inst->SetVRegA_10x(0u); // keep compliant with verifier. + } return inst; } -void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, - uint32_t dex_pc, - Instruction::Code new_opcode, - bool is_put) { +void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst, + uint32_t dex_pc, + Instruction::Code new_opcode, + bool is_put) { if (!kEnableQuickening) { return; } - uint32_t field_idx = inst->VRegC_22c(); + uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c()); MemberOffset field_offset(0u); bool is_volatile; bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put, @@ -278,20 +371,29 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, << " by field offset " << field_offset.Int32Value() << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace field index by field offset. - inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + if (!already_quickened_) { + // We are modifying 4 consecutive bytes. + inst->SetOpcode(new_opcode); + // Replace field index by field offset. + inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + } quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx)); } } -void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range) { +const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const { + return *unit_.GetDexFile(); +} + +void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst, + uint32_t dex_pc, + Instruction::Code new_opcode, + bool is_range) { if (!kEnableQuickening) { return; } - uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); + uint32_t method_idx = GetIndexForInstruction(inst, + is_range ? inst->VRegB_3rc() : inst->VRegB_35c()); ScopedObjectAccess soa(Thread::Current()); ClassLinker* class_linker = unit_.GetClassLinker(); @@ -318,19 +420,20 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, << " by vtable index " << vtable_idx << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace method index by vtable index. - if (is_range) { - inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx)); - } else { - inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); + if (!already_quickened_) { + // We are modifying 4 consecutive bytes. + inst->SetOpcode(new_opcode); + // Replace method index by vtable index. + if (is_range) { + inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx)); + } else { + inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); + } } quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx)); } -CompiledMethod* ArtCompileDEX( - CompilerDriver* driver, +CompiledMethod* DexToDexCompiler::CompileMethod( const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type ATTRIBUTE_UNUSED, @@ -338,69 +441,122 @@ CompiledMethod* ArtCompileDEX( uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level) { - DCHECK(driver != nullptr); - if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - art::DexCompilationUnit unit( - class_loader, - class_linker, - dex_file, - code_item, - class_def_idx, - method_idx, - access_flags, - driver->GetVerifiedMethod(&dex_file, method_idx), - hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file))); - art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level); - dex_compiler.Compile(); - if (dex_compiler.GetQuickenedInfo().empty()) { - // No need to create a CompiledMethod if there are no quickened opcodes. + CompilationLevel compilation_level) { + if (compilation_level == CompilationLevel::kDontDexToDexCompile) { + return nullptr; + } + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + art::DexCompilationUnit unit( + class_loader, + class_linker, + dex_file, + code_item, + class_def_idx, + method_idx, + access_flags, + driver_->GetVerifiedMethod(&dex_file, method_idx), + hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file))); + + std::vector<uint8_t> quicken_data; + // If the code item is shared with multiple different method ids, make sure that we quicken only + // once and verify that all the dequicken maps match. + if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) { + // For shared code items, use a lock to prevent races. + MutexLock mu(soa.Self(), lock_); + // Blacklisted means there was a quickening conflict previously, bail early. + if (blacklisted_code_items_.find(code_item) != blacklisted_code_items_.end()) { return nullptr; } + auto existing = shared_code_item_quicken_info_.find(code_item); + const bool already_quickened = existing != shared_code_item_quicken_info_.end(); + { + CompilationState state(this, + unit, + compilation_level, + already_quickened ? &existing->second.quicken_data_ : nullptr); + quicken_data = state.Compile(); + } - // Create a `CompiledMethod`, with the quickened information in the vmap table. - if (kIsDebugBuild) { - // Double check that the counts line up with the size of the quicken info. - size_t quicken_count = 0; - for (const DexInstructionPcPair& pair : unit.GetCodeItemAccessor()) { - if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { - ++quicken_count; + // Already quickened, check that the data matches what was previously seen. + MethodReference method_ref(&dex_file, method_idx); + if (already_quickened) { + QuickenState* const existing_data = &existing->second; + if (existing_data->quicken_data_ != quicken_data) { + VLOG(compiler) << "Quicken data mismatch, dequickening method " + << dex_file.PrettyMethod(method_idx); + // Unquicken using the existing quicken data. + optimizer::ArtDecompileDEX(dex_file, + *code_item, + ArrayRef<const uint8_t>(existing_data->quicken_data_), + /* decompile_return_instruction*/ false); + // Go clear the vmaps for all the methods that were already quickened to avoid writing them + // out during oat writing. + for (const MethodReference& ref : existing_data->methods_) { + CompiledMethod* method = driver_->GetCompiledMethod(ref); + DCHECK(method != nullptr); + method->ReleaseVMapTable(); } + // Blacklist the method to never attempt to quicken it in the future. + blacklisted_code_items_.insert(code_item); + shared_code_item_quicken_info_.erase(existing); + return nullptr; } - CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size()); + existing_data->methods_.push_back(method_ref); + } else { + QuickenState new_state; + new_state.methods_.push_back(method_ref); + new_state.quicken_data_ = quicken_data; + bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second; + CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx); } - std::vector<uint8_t> quicken_data; - QuickenInfoTable::Builder builder(&quicken_data, dex_compiler.GetQuickenedInfo().size()); - // Length is encoded by the constructor. - for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) { - // Dex pc is not serialized, only used for checking the instructions. Since we access the - // array based on the index of the quickened instruction, the indexes must line up perfectly. - // The reader side uses the NeedsIndexForInstruction function too. - const Instruction& inst = unit.GetCodeItemAccessor().InstructionAt(info.dex_pc); - CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode(); - builder.AddIndex(info.dex_member_index); + + // Easy sanity check is to check that the existing stuff matches by re-quickening using the + // newly produced quicken data. + // Note that this needs to be behind the lock for this case since we may unquicken in another + // thread. + if (kIsDebugBuild) { + CompilationState state2(this, unit, compilation_level, &quicken_data); + std::vector<uint8_t> new_data = state2.Compile(); + CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; } - InstructionSet instruction_set = driver->GetInstructionSet(); - if (instruction_set == InstructionSet::kThumb2) { - // Don't use the thumb2 instruction set to avoid the one off code delta. - instruction_set = InstructionSet::kArm; + } else { + CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr); + quicken_data = state.Compile(); + + // Easy sanity check is to check that the existing stuff matches by re-quickening using the + // newly produced quicken data. + if (kIsDebugBuild) { + CompilationState state2(this, unit, compilation_level, &quicken_data); + std::vector<uint8_t> new_data = state2.Compile(); + CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; } - return CompiledMethod::SwapAllocCompiledMethod( - driver, - instruction_set, - ArrayRef<const uint8_t>(), // no code - 0, - 0, - 0, - ArrayRef<const uint8_t>(), // method_info - ArrayRef<const uint8_t>(quicken_data), // vmap_table - ArrayRef<const uint8_t>(), // cfi data - ArrayRef<const linker::LinkerPatch>()); } - return nullptr; + + if (quicken_data.empty()) { + return nullptr; + } + + // Create a `CompiledMethod`, with the quickened information in the vmap table. + InstructionSet instruction_set = driver_->GetInstructionSet(); + if (instruction_set == InstructionSet::kThumb2) { + // Don't use the thumb2 instruction set to avoid the one off code delta. + instruction_set = InstructionSet::kArm; + } + CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod( + driver_, + instruction_set, + ArrayRef<const uint8_t>(), // no code + 0, + 0, + 0, + ArrayRef<const uint8_t>(), // method_info + ArrayRef<const uint8_t>(quicken_data), // vmap_table + ArrayRef<const uint8_t>(), // cfi data + ArrayRef<const linker::LinkerPatch>()); + return ret; } } // namespace optimizer diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 80b94d2dc3..abd048167c 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -17,14 +17,22 @@ #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ +#include <set> +#include <unordered_map> +#include <unordered_set> + +#include "base/bit_vector.h" #include "dex/dex_file.h" #include "handle.h" #include "invoke_type.h" +#include "method_reference.h" +#include "quicken_info.h" namespace art { class CompiledMethod; class CompilerDriver; +class DexCompilationUnit; namespace mirror { class ClassLoader; @@ -32,21 +40,144 @@ class ClassLoader; namespace optimizer { -enum class DexToDexCompilationLevel { - kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kOptimize // Perform peep-hole optimizations. +class DexToDexCompiler { + public: + enum class CompilationLevel { + kDontDexToDexCompile, // Only meaning wrt image time interpretation. + kOptimize // Perform peep-hole optimizations. + }; + + explicit DexToDexCompiler(CompilerDriver* driver); + + CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, + const CompilationLevel compilation_level) WARN_UNUSED; + + void MarkForCompilation(Thread* self, + const MethodReference& method_ref, + const DexFile::CodeItem* code_item); + + void ClearState(); + + CompilerDriver* GetDriver() { + return driver_; + } + + bool ShouldCompileMethod(const MethodReference& ref); + + size_t NumUniqueCodeItems(Thread* self) const; + + private: + // Holds the state for compiling a single method. + struct CompilationState { + struct QuickenedInfo { + QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} + + uint32_t dex_pc; + uint16_t dex_member_index; + }; + + CompilationState(DexToDexCompiler* compiler, + const DexCompilationUnit& unit, + const CompilationLevel compilation_level, + const std::vector<uint8_t>* quicken_data); + + const std::vector<QuickenedInfo>& GetQuickenedInfo() const { + return quickened_info_; + } + + // Returns the quickening info, or an empty array if it was not quickened. + // If already_quickened is true, then don't change anything but still return what the quicken + // data would have been. + std::vector<uint8_t> Compile(); + + const DexFile& GetDexFile() const; + + // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where + // a barrier is required. + void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); + + // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In + // this case, returns the second NOP instruction pointer. Otherwise, returns + // the given "inst". + Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); + + // Compiles a field access into a quick field access. + // The field index is replaced by an offset within an Object where we can read + // from / write to this field. Therefore, this does not involve any resolution + // at runtime. + // Since the field index is encoded with 16 bits, we can replace it only if the + // field offset can be encoded with 16 bits too. + void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_put); + + // Compiles a virtual method invocation into a quick virtual method invocation. + // The method index is replaced by the vtable index where the corresponding + // executable can be found. Therefore, this does not involve any resolution + // at runtime. + // Since the method index is encoded with 16 bits, we can replace it only if the + // vtable index can be encoded with 16 bits too. + void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, + Instruction::Code new_opcode, bool is_range); + + // Return the next index. + uint16_t NextIndex(); + + // Returns the dequickened index if an instruction is quickened, otherwise return index. + uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index); + + DexToDexCompiler* const compiler_; + CompilerDriver& driver_; + const DexCompilationUnit& unit_; + const CompilationLevel compilation_level_; + + // Filled by the compiler when quickening, in order to encode that information + // in the .oat file. The runtime will use that information to get to the original + // opcodes. + std::vector<QuickenedInfo> quickened_info_; + + // If the code item was already quickened previously. + const bool already_quickened_; + const QuickenInfoTable existing_quicken_info_; + uint32_t quicken_index_ = 0u; + + DISALLOW_COPY_AND_ASSIGN(CompilationState); + }; + + struct QuickenState { + std::vector<MethodReference> methods_; + std::vector<uint8_t> quicken_data_; + }; + + BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_); + + CompilerDriver* const driver_; + + // State for adding methods (should this be in its own class?). + const DexFile* active_dex_file_ = nullptr; + BitVector* active_bit_vector_ = nullptr; + + // Lock that guards duplicate code items and the bitmap. + mutable Mutex lock_; + // Record what method references are going to get quickened. + std::unordered_map<const DexFile*, BitVector> should_quicken_; + // Record what code items are already seen to detect when multiple methods have the same code + // item. + std::unordered_set<const DexFile::CodeItem*> seen_code_items_ GUARDED_BY(lock_); + // Guarded by lock_ during writing, accessed without a lock during quickening. + // This is safe because no thread is adding to the shared code items during the quickening phase. + std::unordered_set<const DexFile::CodeItem*> shared_code_items_; + std::unordered_set<const DexFile::CodeItem*> blacklisted_code_items_ GUARDED_BY(lock_); + std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_ + GUARDED_BY(lock_); }; -std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); - -CompiledMethod* ArtCompileDEX(CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level); + +std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs); } // namespace optimizer diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc index 540bd0ce45..baf97a852e 100644 --- a/compiler/dex/quick_compiler_callbacks.cc +++ b/compiler/dex/quick_compiler_callbacks.cc @@ -17,6 +17,10 @@ #include "quick_compiler_callbacks.h" #include "driver/compiler_driver.h" +#include "mirror/class-inl.h" +#include "mirror/object.h" +#include "obj_ptr-inl.h" +#include "thread-current-inl.h" #include "verification_results.h" #include "verifier/method_verifier-inl.h" @@ -54,4 +58,15 @@ void QuickCompilerCallbacks::UpdateClassState(ClassReference ref, ClassStatus st } } +bool QuickCompilerCallbacks::CanUseOatStatusForVerification(mirror::Class* klass) { + // No dex files: conservatively false. + if (dex_files_ == nullptr) { + return false; + } + + // If the class isn't from one of the dex files, accept oat file data. + const DexFile* dex_file = &klass->GetDexFile(); + return std::find(dex_files_->begin(), dex_files_->end(), dex_file) == dex_files_->end(); +} + } // namespace art diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h index 6d22f955a3..8a07e9c12c 100644 --- a/compiler/dex/quick_compiler_callbacks.h +++ b/compiler/dex/quick_compiler_callbacks.h @@ -23,12 +23,13 @@ namespace art { class CompilerDriver; +class DexFile; class VerificationResults; class QuickCompilerCallbacks FINAL : public CompilerCallbacks { public: explicit QuickCompilerCallbacks(CompilerCallbacks::CallbackMode mode) - : CompilerCallbacks(mode) {} + : CompilerCallbacks(mode), dex_files_(nullptr) {} ~QuickCompilerCallbacks() { } @@ -65,11 +66,19 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks { void UpdateClassState(ClassReference ref, ClassStatus state) OVERRIDE; + bool CanUseOatStatusForVerification(mirror::Class* klass) OVERRIDE + REQUIRES_SHARED(Locks::mutator_lock_); + + void SetDexFiles(const std::vector<const DexFile*>* dex_files) { + dex_files_ = dex_files; + } + private: VerificationResults* verification_results_ = nullptr; bool does_class_unloading_ = false; CompilerDriver* compiler_driver_ = nullptr; std::unique_ptr<verifier::VerifierDeps> verifier_deps_; + const std::vector<const DexFile*>* dex_files_; }; } // namespace art diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 869865956c..3720dda0f8 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -56,6 +56,7 @@ #include "gc/space/space.h" #include "handle_scope-inl.h" #include "intrinsics_enum.h" +#include "jit/profile_compilation_info.h" #include "jni_internal.h" #include "linker/linker_patch.h" #include "mirror/class-inl.h" @@ -255,24 +256,6 @@ class CompilerDriver::AOTCompilationStats { DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats); }; -class CompilerDriver::DexFileMethodSet { - public: - explicit DexFileMethodSet(const DexFile& dex_file) - : dex_file_(dex_file), - method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) { - } - DexFileMethodSet(DexFileMethodSet&& other) = default; - - const DexFile& GetDexFile() const { return dex_file_; } - - BitVector& GetMethodIndexes() { return method_indexes_; } - const BitVector& GetMethodIndexes() const { return method_indexes_; } - - private: - const DexFile& dex_file_; - BitVector method_indexes_; -}; - CompilerDriver::CompilerDriver( const CompilerOptions* compiler_options, VerificationResults* verification_results, @@ -306,9 +289,8 @@ CompilerDriver::CompilerDriver( compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), max_arena_alloc_(0), - dex_to_dex_references_lock_("dex-to-dex references lock"), - dex_to_dex_references_(), - current_dex_to_dex_methods_(nullptr) { + compiling_dex_to_dex_(false), + dex_to_dex_compiler_(this) { DCHECK(compiler_options_ != nullptr); compiler_->Init(); @@ -398,10 +380,16 @@ void CompilerDriver::CompileAll(jobject class_loader, FreeThreadPools(); } -static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( +static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, const DexFile::ClassDef& class_def) REQUIRES_SHARED(Locks::mutator_lock_) { + // When the dex file is uncompressed in the APK, we do not generate a copy in the .vdex + // file. As a result, dex2oat will map the dex file read-only, and we only need to check + // that to know if we can do quickening. + if (dex_file.GetContainer() != nullptr && dex_file.GetContainer()->IsReadOnly()) { + return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; + } auto* const runtime = Runtime::Current(); DCHECK(driver.GetCompilerOptions().IsQuickeningCompilationEnabled()); const char* descriptor = dex_file.GetClassDescriptor(class_def); @@ -410,7 +398,7 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( if (klass == nullptr) { CHECK(self->IsExceptionPending()); self->ClearException(); - return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic // references with actual offsets. We cannot re-verify such instructions. @@ -418,22 +406,23 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( // We store the verification information in the class status in the oat file, which the linker // can validate (checksums) and use to skip load-time verification. It is thus safe to // optimize when a class has been fully verified before. - optimizer::DexToDexCompilationLevel max_level = optimizer::DexToDexCompilationLevel::kOptimize; + optimizer::DexToDexCompiler::CompilationLevel max_level = + optimizer::DexToDexCompiler::CompilationLevel::kOptimize; if (driver.GetCompilerOptions().GetDebuggable()) { // We are debuggable so definitions of classes might be changed. We don't want to do any // optimizations that could break that. - max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } if (klass->IsVerified()) { // Class is verified so we can enable DEX-to-DEX compilation for performance. return max_level; } else { // Class verification has failed: do not run DEX-to-DEX optimizations. - return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; } } -static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( +static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( Thread* self, const CompilerDriver& driver, jobject jclass_loader, @@ -470,7 +459,7 @@ static void CompileMethod(Thread* self, uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level, + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, bool compilation_enabled, Handle<mirror::DexCache> dex_cache) { DCHECK(driver != nullptr); @@ -478,18 +467,18 @@ static void CompileMethod(Thread* self, uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; MethodReference method_ref(&dex_file, method_idx); - if (driver->GetCurrentDexToDexMethods() != nullptr) { + if (driver->GetCompilingDexToDex()) { + optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); // This is the second pass when we dex-to-dex compile previously marked methods. // TODO: Refactor the compilation to avoid having to distinguish the two passes // here. That should be done on a higher level. http://b/29089975 - if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) { + if (compiler->ShouldCompileMethod(method_ref)) { VerificationResults* results = driver->GetVerificationResults(); DCHECK(results != nullptr); const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); // Do not optimize if a VerifiedMethod is missing. SafeCast elision, // for example, relies on it. - compiled_method = optimizer::ArtCompileDEX( - driver, + compiled_method = compiler->CompileMethod( code_item, access_flags, invoke_type, @@ -499,7 +488,7 @@ static void CompileMethod(Thread* self, dex_file, (verified_method != nullptr) ? dex_to_dex_compilation_level - : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile); + : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); } } else if ((access_flags & kAccNative) != 0) { // Are we extracting only and have support for generic JNI down calls? @@ -524,7 +513,7 @@ static void CompileMethod(Thread* self, bool compile = compilation_enabled && // Basic checks, e.g., not <clinit>. results->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadata. + // Did not fail to create VerifiedMethod metadcata. verified_method != nullptr && // Do not have failures that should punt to the interpreter. !verified_method->HasRuntimeThrow() && @@ -546,10 +535,12 @@ static void CompileMethod(Thread* self, dex_cache); } if (compiled_method == nullptr && - dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) { + dex_to_dex_compilation_level != + optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { DCHECK(!Runtime::Current()->UseJitCompilation()); + DCHECK(!driver->GetCompilingDexToDex()); // TODO: add a command-line option to disable DEX-to-DEX compilation ? - driver->MarkForDexToDexCompilation(self, method_ref); + driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref, code_item); } } if (kTimeCompileMethod) { @@ -616,14 +607,14 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t PreCompile(jclass_loader, dex_files, timings); // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = GetDexToDexCompilationLevel(self, *this, jclass_loader, *dex_file, dex_file->GetClassDef(class_def_idx)); - DCHECK(current_dex_to_dex_methods_ == nullptr); + DCHECK(!compiling_dex_to_dex_); CompileMethod(self, this, code_item, @@ -637,19 +628,10 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t true, dex_cache); - ArrayRef<DexFileMethodSet> dex_to_dex_references; - { - // From this point on, we shall not modify dex_to_dex_references_, so - // just grab a reference to it that we use without holding the mutex. - MutexLock lock(Thread::Current(), dex_to_dex_references_lock_); - dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_); - } - if (!dex_to_dex_references.empty()) { - DCHECK_EQ(dex_to_dex_references.size(), 1u); - DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file); - current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes(); - DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx)); - DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u); + const size_t num_methods = dex_to_dex_compiler_.NumUniqueCodeItems(self); + if (num_methods != 0) { + DCHECK_EQ(num_methods, 1u); + compiling_dex_to_dex_ = true; CompileMethod(self, this, code_item, @@ -662,7 +644,8 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t dex_to_dex_compilation_level, true, dex_cache); - current_dex_to_dex_methods_ = nullptr; + compiling_dex_to_dex_ = false; + dex_to_dex_compiler_.ClearState(); } FreeThreadPools(); @@ -697,7 +680,8 @@ void CompilerDriver::Resolve(jobject class_loader, // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a // stable order. -static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, +static void ResolveConstStrings(ClassLinker* class_linker, + Handle<mirror::DexCache> dex_cache, const DexFile& dex_file, const DexFile::CodeItem* code_item) REQUIRES_SHARED(Locks::mutator_lock_) { @@ -706,7 +690,6 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, return; } - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { switch (inst->Opcode()) { case Instruction::CONST_STRING: @@ -754,22 +737,105 @@ static void ResolveConstStrings(CompilerDriver* driver, dex_file->StringByTypeIdx(class_def.class_idx_)); if (!compilation_enabled) { // Compilation is skipped, do not resolve const-string in code of this class. - // TODO: Make sure that inlining honors this. + // FIXME: Make sure that inlining honors this. b/26687569 continue; } // Direct and virtual methods. - int64_t previous_method_idx = -1; while (it.HasNextMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 - it.Next(); - continue; + ResolveConstStrings(class_linker, dex_cache, *dex_file, it.GetMethodCodeItem()); + it.Next(); + } + DCHECK(!it.HasNext()); + } + } +} + +// Initialize type check bit strings for check-cast and instance-of in the code. Done to have +// deterministic allocation behavior. Right now this is single-threaded for simplicity. +// TODO: Collect the relevant type indices in parallel, then process them sequentially in a +// stable order. + +static void InitializeTypeCheckBitstrings(CompilerDriver* driver, + ClassLinker* class_linker, + Handle<mirror::DexCache> dex_cache, + const DexFile& dex_file, + const DexFile::CodeItem* code_item) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (code_item == nullptr) { + // Abstract or native method. + return; + } + + for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { + switch (inst->Opcode()) { + case Instruction::CHECK_CAST: + case Instruction::INSTANCE_OF: { + dex::TypeIndex type_index( + (inst->Opcode() == Instruction::CHECK_CAST) ? inst->VRegB_21c() : inst->VRegC_22c()); + const char* descriptor = dex_file.StringByTypeIdx(type_index); + // We currently do not use the bitstring type check for array or final (including + // primitive) classes. We may reconsider this in future if it's deemed to be beneficial. + // And we cannot use it for classes outside the boot image as we do not know the runtime + // value of their bitstring when compiling (it may not even get assigned at runtime). + if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) { + ObjPtr<mirror::Class> klass = + class_linker->LookupResolvedType(type_index, + dex_cache.Get(), + /* class_loader */ nullptr); + CHECK(klass != nullptr) << descriptor << " should have been previously resolved."; + // Now assign the bitstring if the class is not final. Keep this in sync with sharpening. + if (!klass->IsFinal()) { + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + } } - previous_method_idx = method_idx; - ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem()); + break; + } + + default: + break; + } + } +} + +static void InitializeTypeCheckBitstrings(CompilerDriver* driver, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); + + for (const DexFile* dex_file : dex_files) { + dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file)); + TimingLogger::ScopedTiming t("Initialize type check bitstrings", timings); + + size_t class_def_count = dex_file->NumClassDefs(); + for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + // empty class, probably a marker interface + continue; + } + + ClassDataItemIterator it(*dex_file, class_data); + it.SkipAllFields(); + + bool compilation_enabled = driver->IsClassToCompile( + dex_file->StringByTypeIdx(class_def.class_idx_)); + if (!compilation_enabled) { + // Compilation is skipped, do not look for type checks in code of this class. + // FIXME: Make sure that inlining honors this. b/26687569 + continue; + } + + // Direct and virtual methods. + while (it.HasNextMethod()) { + InitializeTypeCheckBitstrings( + driver, class_linker, dex_cache, *dex_file, it.GetMethodCodeItem()); it.Next(); } DCHECK(!it.HasNext()); @@ -871,6 +937,13 @@ void CompilerDriver::PreCompile(jobject class_loader, UpdateImageClasses(timings); VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false); + + if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) { + // Initialize type check bit string used by check-cast and instanceof. + // Do this now to have a deterministic image. + // Note: This is done after UpdateImageClasses() at it relies on the image classes to be final. + InitializeTypeCheckBitstrings(this, dex_files, timings); + } } bool CompilerDriver::IsImageClass(const char* descriptor) const { @@ -1280,17 +1353,6 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { return IsImageClass(descriptor); } -void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) { - MutexLock lock(self, dex_to_dex_references_lock_); - // Since we're compiling one dex file at a time, we need to look for the - // current dex file entry only at the end of dex_to_dex_references_. - if (dex_to_dex_references_.empty() || - &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) { - dex_to_dex_references_.emplace_back(*method_ref.dex_file); - } - dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.index); -} - bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, ObjPtr<mirror::Class> resolved_class) { if (resolved_class == nullptr) { @@ -2612,14 +2674,8 @@ void CompilerDriver::Compile(jobject class_loader, : profile_compilation_info_->DumpInfo(&dex_files)); } - current_dex_to_dex_methods_ = nullptr; - Thread* const self = Thread::Current(); - { - // Clear in case we aren't the first call to Compile. - MutexLock mu(self, dex_to_dex_references_lock_); - dex_to_dex_references_.clear(); - } - + dex_to_dex_compiler_.ClearState(); + compiling_dex_to_dex_ = false; for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); CompileDexFile(class_loader, @@ -2634,23 +2690,21 @@ void CompilerDriver::Compile(jobject class_loader, Runtime::Current()->ReclaimArenaPoolMemory(); } - ArrayRef<DexFileMethodSet> dex_to_dex_references; - { - // From this point on, we shall not modify dex_to_dex_references_, so - // just grab a reference to it that we use without holding the mutex. - MutexLock lock(self, dex_to_dex_references_lock_); - dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_); - } - for (const auto& method_set : dex_to_dex_references) { - current_dex_to_dex_methods_ = &method_set.GetMethodIndexes(); - CompileDexFile(class_loader, - method_set.GetDexFile(), - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); + if (dex_to_dex_compiler_.NumUniqueCodeItems(Thread::Current()) > 0u) { + compiling_dex_to_dex_ = true; + // TODO: Not visit all of the dex files, its probably rare that only one would have quickened + // methods though. + for (const DexFile* dex_file : dex_files) { + CompileDexFile(class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings); + } + dex_to_dex_compiler_.ClearState(); + compiling_dex_to_dex_ = false; } - current_dex_to_dex_methods_ = nullptr; VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } @@ -2701,7 +2755,7 @@ class CompileClassVisitor : public CompilationVisitor { CompilerDriver* const driver = manager_->GetCompiler(); // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def); ClassDataItemIterator it(dex_file, class_data); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index ef16212fb7..18b1e0ea3c 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -35,8 +35,8 @@ #include "compiler.h" #include "dex/dex_file.h" #include "dex/dex_file_types.h" +#include "dex/dex_to_dex_compiler.h" #include "driver/compiled_method_storage.h" -#include "jit/profile_compilation_info.h" #include "method_reference.h" #include "os.h" #include "safe_map.h" @@ -69,6 +69,7 @@ enum InvokeType : uint32_t; class MemberOffset; template<class MirrorType> class ObjPtr; class ParallelCompilationManager; +class ProfileCompilationInfo; class ScopedObjectAccess; template <class Allocator> class SrcMap; class TimingLogger; @@ -76,6 +77,9 @@ class VdexFile; class VerificationResults; class VerifiedMethod; +// Compile-time flag to enable/disable bitstring type checks. +static constexpr bool kUseBitstringTypeCheck = true; + enum EntryPointCallingConvention { // ABI of invocations to a method's interpreter entry point. kInterpreterAbi, @@ -106,13 +110,13 @@ class CompilerDriver { ~CompilerDriver(); - // Set dex files that will be stored in the oat file after being compiled. + // Set dex files associated with the oat file being compiled. void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files); // Set dex files classpath. void SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files); - // Get dex file that will be stored in the oat file after being compiled. + // Get dex files associated with the the oat file being compiled. ArrayRef<const DexFile* const> GetDexFilesForOatFile() const { return ArrayRef<const DexFile* const>(dex_files_for_oat_file_); } @@ -120,12 +124,11 @@ class CompilerDriver { void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_, !dex_to_dex_references_lock_); + REQUIRES(!Locks::mutator_lock_); // Compile a single Method. void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!dex_to_dex_references_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); VerificationResults* GetVerificationResults() const; @@ -362,13 +365,6 @@ class CompilerDriver { return true; } - void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) - REQUIRES(!dex_to_dex_references_lock_); - - const BitVector* GetCurrentDexToDexMethods() const { - return current_dex_to_dex_methods_; - } - const ProfileCompilationInfo* GetProfileCompilationInfo() const { return profile_compilation_info_; } @@ -381,6 +377,14 @@ class CompilerDriver { || android::base::EndsWith(boot_image_filename, "core-optimizing.art"); } + bool GetCompilingDexToDex() const { + return compiling_dex_to_dex_; + } + + optimizer::DexToDexCompiler& GetDexToDexCompiler() { + return dex_to_dex_compiler_; + } + private: void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, @@ -447,7 +451,7 @@ class CompilerDriver { void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_); + TimingLogger* timings); void CompileDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -529,7 +533,7 @@ class CompilerDriver { bool support_boot_image_fixup_; - // List of dex files that will be stored in the oat file. + // List of dex files associates with the oat file. std::vector<const DexFile*> dex_files_for_oat_file_; CompiledMethodStorage compiled_method_storage_; @@ -539,14 +543,9 @@ class CompilerDriver { size_t max_arena_alloc_; - // Data for delaying dex-to-dex compilation. - Mutex dex_to_dex_references_lock_; - // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation. - class DexFileMethodSet; - std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_); - // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method - // indexes for dex-to-dex compilation in the current dex file. - const BitVector* current_dex_to_dex_methods_; + // Compiler for dex to dex (quickening). + bool compiling_dex_to_dex_; + optimizer::DexToDexCompiler dex_to_dex_compiler_; friend class CompileClassVisitor; friend class DexToDexDecompilerTest; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 88e3e5b230..2c62095458 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -76,6 +76,7 @@ extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t cou const ArrayRef<mirror::Class*> types_array(types, count); std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array); + MutexLock mu(Thread::Current(), g_jit_debug_mutex); CreateJITCodeEntry(std::move(elf_file)); } } diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index cedbe5d97f..6e0286afac 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -250,12 +250,12 @@ std::vector<debug::MethodDebugInfo> ArmBaseRelativePatcher::GenerateThunkDebugIn for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) { debug::MethodDebugInfo info = {}; if (i == 0u) { - info.trampoline_name = base_name; + info.custom_name = base_name; } else { // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_` // keeps records also for thunks in previous oat files, names based on the thunk // index shall be unique across the whole multi-oat output. - info.trampoline_name = base_name + "_" + std::to_string(i); + info.custom_name = base_name + "_" + std::to_string(i); } info.isa = instruction_set_; info.is_code_address_text_relative = true; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 147df1e3e8..d893cc88c4 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -836,9 +836,23 @@ class BCEVisitor : public HGraphVisitor { ValueRange array_range(&allocator_, lower, upper); // Try index range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceInstruction(bounds_check, index); - return; + if (index_range != nullptr) { + if (index_range->FitsIn(&array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } else if (index_range->IsConstantValueRange()) { + // If the non-constant index turns out to have a constant range, + // make one more attempt to get a constant in the array range. + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr && + existing_range->IsConstantValueRange()) { + ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + if (index_range->FitsIn(&constant_array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } + } + } } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 3c5a37f958..2dafbf7f6d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -438,6 +438,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; + case TypeCheckKind::kBitstringCheck: + return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 13bbffa1e3..b47a5cf3c4 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2112,6 +2112,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, vixl::aarch64::Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + } + // Compare the bitstring bits to `path_to_root`. + __ Cmp(temp, path_to_root); +} + void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -3840,6 +3860,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -3848,7 +3870,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3861,7 +3889,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4047,6 +4077,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Cset(out, eq); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } } if (zero.IsLinked()) { @@ -4069,7 +4116,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4079,7 +4132,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4260,6 +4315,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f92c94fda7..cc369de983 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -264,6 +264,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 577fe00dcd..504c6479cc 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2490,8 +2490,12 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { } if (!skip_overflow_check) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here. + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + vixl32::Register temp = r4; __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), @@ -7191,6 +7195,67 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, + vixl32::Register temp, + vixl32::FlagsUpdate flags_update) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs + // the Z flag for BNE. This is indicated by the `flags_update` parameter. + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root); + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else if (IsUint<16>(path_to_root)) { + if (temp.IsLow()) { + // Note: Optimized for size but contains one more dependent instruction than necessary. + // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the + // macro assembler would use the high reg IP for the constant by default. + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 + __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else { + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } + } else { + // Shift out bits that do not contribute to the comparison. + __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root << (32u - mask_bits)); + } else { + __ Sub(temp, temp, path_to_root << (32u - mask_bits)); + } + } + } +} + HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -7382,6 +7447,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7390,7 +7457,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7405,7 +7478,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7645,6 +7720,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out, DontCare); + // If `out` is a low reg and we would have another low reg temp, we could + // optimize this as RSBS+ADC, see GenerateConditionWithZero(). + // + // Also, in some cases when `out` is a low reg and we're loading a constant to IP + // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size + // would be the same and we would have fewer direct data dependencies. + codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR + break; + } } if (done.IsReferenced()) { @@ -7662,7 +7757,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7671,7 +7772,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7856,6 +7959,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop, /* far_target */ false); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsReferenced()) { __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 38570bb0fe..bd815f45b3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -322,6 +322,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch32::Register temp, + vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5c8e46ed19..2ed0ab79a1 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1929,6 +1929,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -3289,12 +3317,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3303,7 +3339,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3353,7 +3389,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls, slow_path->GetEntryLabel()); + __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); break; } @@ -3379,7 +3415,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls, &loop); + __ Bne(temp, cls.AsRegister<Register>(), &loop); break; } @@ -3394,7 +3430,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3417,7 +3453,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3476,7 +3512,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls, &loop); + __ Bne(AT, cls.AsRegister<Register>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); break; } } @@ -7207,6 +7257,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7215,7 +7267,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7227,7 +7285,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7257,7 +7315,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<Register>()); __ Sltiu(out, out, 1); break; } @@ -7282,7 +7340,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls, &loop); + __ Bne(out, cls.AsRegister<Register>(), &loop); __ LoadConst32(out, 1); break; } @@ -7298,7 +7356,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7323,7 +7381,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // Do an exact check. MipsLabel success; - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7355,7 +7413,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls, slow_path->GetEntryLabel()); + __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7387,6 +7445,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 32b3e4221f..ffeb3b00a7 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index bcfe051c90..3ae8a30754 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1775,6 +1775,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + GpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2844,12 +2872,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2858,7 +2894,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2908,7 +2944,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls, slow_path->GetEntryLabel()); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); break; } @@ -2934,7 +2970,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls, &loop); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); break; } @@ -2949,7 +2985,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -2972,7 +3008,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3031,7 +3067,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls, &loop); + __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnezc(temp, slow_path->GetEntryLabel()); break; } } @@ -5524,6 +5574,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5532,7 +5584,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5544,7 +5602,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5574,7 +5632,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<GpuRegister>()); __ Sltiu(out, out, 1); break; } @@ -5599,7 +5657,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls, &loop); + __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); __ LoadConst32(out, 1); break; } @@ -5615,7 +5673,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -5640,7 +5698,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // Do an exact check. Mips64Label success; - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -5672,7 +5730,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls, slow_path->GetEntryLabel()); + __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5704,6 +5762,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { __ Bc(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d479410f07..87d5a9c15a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cbe9e0a35c..e85f9001b1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6234,6 +6234,27 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if ((false) && mask_bits == 16u) { + // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -6426,6 +6447,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -6434,7 +6457,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6655,6 +6684,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + break; + } } if (zero.IsLinked()) { @@ -6681,6 +6725,10 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6900,6 +6948,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0082853184..2d14d4cdc2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 510eec4f30..9f8b1bb038 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5440,6 +5440,27 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + CpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if ((false) && mask_bits == 16u) { + // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5812,6 +5833,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5820,7 +5843,13 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for @@ -6049,6 +6078,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + if (zero.IsLinked()) { + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } + break; + } } if (zero.IsLinked()) { @@ -6075,6 +6125,10 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6261,7 +6315,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kInterfaceCheck: { // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6297,6 +6351,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsLinked()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e86123ef01..97f8ec7ae0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610..fbcbe3608e 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h" namespace art { @@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - VisitInstruction(check); - HInstruction* input = check->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name) { + if (!check->InputAt(input_pos)->IsIntConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", check->DebugName(), check->GetId(), - input->DebugName(), - input->GetId())); + input_pos, + name, + check->InputAt(2)->DebugName(), + check->InputAt(2)->GetId())); + } else if (check_value) { + uint32_t actual_value = + static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); + if (actual_value != expected_value) { + AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", + check->DebugName(), + check->GetId(), + name, + actual_value, + expected_value)); + } } } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", - instruction->DebugName(), - instruction->GetId(), - input->DebugName(), - input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + VisitInstruction(check); + HInstruction* input = check->InputAt(1); + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + if (!input->IsNullConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } + bool check_values = false; + BitString::StorageType expected_path_to_root = 0u; + BitString::StorageType expected_mask = 0u; + { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = check->GetClass().Get(); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + if (state == SubtypeCheckInfo::kAssigned) { + expected_path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); + expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); + check_values = true; + } else { + AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", + check->DebugName(), + check->GetId())); + } + } + CheckTypeCheckBitstringInput( + check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root"); + CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask"); + } else { + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } } } +void GraphChecker::VisitCheckCast(HCheckCast* check) { + HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { + HandleTypeCheckInstruction(instruction); +} + void GraphChecker::HandleLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240..dbedc40518 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name); + void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 12c69889ab..55191214a6 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -389,16 +389,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + StartAttributeStream("check_kind") << check->GetTypeCheckKind(); StartAttributeStream("must_do_null_check") << std::boolalpha - << check_cast->MustDoNullCheck() << std::noboolalpha; + << check->MustDoNullCheck() << std::noboolalpha; + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + StartAttributeStream("path_to_root") << std::hex + << "0x" << check->GetBitstringPathToRoot() << std::dec; + StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; + } + } + + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { + HandleTypeCheckInstruction(check_cast); } void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << instance_of->MustDoNullCheck() << std::noboolalpha; + HandleTypeCheckInstruction(instance_of); } void VisitArrayLength(HArrayLength* array_length) OVERRIDE { @@ -648,20 +655,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } + // For the builder and the inliner, we want to add extra information on HInstructions + // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference)) { - ReferenceTypeInfo info = instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo(); + && (instruction->GetType() == DataType::Type::kReference || + instruction->IsInstanceOf() || + instruction->IsCheckCast())) { + ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) + ? instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo() + : instruction->IsInstanceOf() + ? instruction->AsInstanceOf()->GetTargetClassRTI() + : instruction->AsCheckCast()->GetTargetClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + if (instruction->GetType() == DataType::Type::kReference) { + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + } StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass()) { + } else if (instruction->IsLoadClass() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 64a1eccf60..0205c6a4d3 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1811,29 +1811,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (cls == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (cls->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (cls->IsArrayClass()) { - if (cls->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (cls->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (cls->IsFinal()) { - return TypeCheckKind::kExactCheck; - } else if (cls->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); @@ -1848,22 +1825,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - - bool needs_access_check = true; - if (klass != nullptr) { - if (klass->IsPublic()) { - needs_access_check = false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { - needs_access_check = false; - } - } - } - + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1908,25 +1871,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, + dex::TypeIndex type_index) { + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); + ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass( + soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_); + // TODO: Avoid creating excessive handles if the method references the same class repeatedly. + // (Use a map on the local_allocator_.) + return handles_->NewHandle(klass); +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { + if (klass == nullptr) { + return true; + } else if (klass->IsPublic()) { + return false; + } else { + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); + } +} + void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); + TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( + klass.Get(), code_generator_, compiler_driver_, needs_access_check); + + HInstruction* class_or_null = nullptr; + HIntConstant* bitstring_path_to_root = nullptr; + HIntConstant* bitstring_mask = nullptr; + if (check_kind == TypeCheckKind::kBitstringCheck) { + // TODO: Allow using the bitstring check also if we need an access check. + DCHECK(!needs_access_check); + class_or_null = graph_->GetNullConstant(dex_pc); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + uint32_t path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); + uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); + bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); + bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); + } else { + class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); + } + DCHECK(class_or_null != nullptr); + if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction( + new (allocator_) HCheckCast(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c53277..f78829232d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -39,6 +39,7 @@ class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; class OptimizingCompilerStats; +class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; @@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); + Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); + // Returns the outer-most compiling method's class. ObjPtr<mirror::Class> GetOutermostCompilingClass() const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d..2538fa38f1 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -576,7 +576,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, + HInstruction* object, + /*out*/bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -586,7 +588,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo return false; } - ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -611,8 +612,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + check_cast->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -630,15 +631,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = check_cast->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -649,8 +653,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + instruction->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -673,7 +677,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -686,11 +690,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && !load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = instruction->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca1b451e6b..2f8e33f941 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 99b8b5df74..830d0403e4 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 113c9de5a2..cafa5228d9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 521bad27e2..89f1818be2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index baa410b884..46b7f3f1ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6dd8b8e1f5..6483b7cb2a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 727431a493..5587f87dae 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -865,6 +865,15 @@ void HLoopInformation::Populate() { graph->SetHasLoops(true); } +void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) { + DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this); + blocks_.Union(&inner_loop->blocks_); + HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(this); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { HBasicBlock* block = header_->GetPredecessors()[0]; DCHECK(irreducible_ || (block == header_->GetDominator())); @@ -3096,6 +3105,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; + case TypeCheckKind::kBitstringCheck: + return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 2047954207..b0657d6f1c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -826,6 +826,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Finds blocks that are part of this loop. void Populate(); + // Updates blocks population of the loop and all of its outer' ones recursively after the + // population of the inner loop is updated. + void PopulateInnerLoopUpwards(HLoopInformation* inner_loop); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -856,6 +860,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { bool HasExitEdge() const; + // Resets back edge and blocks-in-loop data. + void ResetBasicBlockData() { + back_edges_.clear(); + ClearAllBlocks(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -998,6 +1008,18 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { loop_information_->AddBackEdge(back_edge); } + // Registers a back edge; if the block was not a loop header before the call associates a newly + // created loop info with it. + // + // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop + // info for all blocks during back edges recalculation. + void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) { + if (loop_information_ == nullptr || loop_information_->GetHeader() != this) { + loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_); + } + loop_information_->AddBackEdge(back_edge); + } + HGraph* GetGraph() const { return graph_; } void SetGraph(HGraph* graph) { graph_ = graph; } @@ -5929,8 +5951,7 @@ class HLoadClass FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass), - loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + klass_(klass) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -5940,6 +5961,7 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); + SetPackedFlag<kFlagValidLoadedClassRTI>(false); } bool IsClonable() const OVERRIDE { return true; } @@ -5988,13 +6010,18 @@ class HLoadClass FINAL : public HInstruction { } ReferenceTypeInfo GetLoadedClassRTI() { - return loaded_class_rti_; + if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - void SetLoadedClassRTI(ReferenceTypeInfo rti) { - // Make sure we only set exact types (the loaded class should never be merged). - DCHECK(rti.IsExact()); - loaded_class_rti_ = rti; + // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidLoadedClassRTI>(true); } dex::TypeIndex GetTypeIndex() const { return type_index_; } @@ -6047,7 +6074,8 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; @@ -6075,8 +6103,6 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; - - ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6604,71 +6630,156 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. + kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction { public: - HInstanceOf(HInstruction* object, - HLoadClass* target_class, - TypeCheckKind check_kind, - uint32_t dex_pc) - : HExpression(DataType::Type::kBool, - SideEffectsForArchRuntimeCalls(check_kind), - dex_pc) { + HTypeCheckInstruction(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask, + SideEffects side_effects) + : HVariableInputSizeInstruction( + side_effects, + dex_pc, + allocator, + /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, + kArenaAllocTypeCheckInputs), + klass_(klass) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); + SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class); + SetRawInputAt(1, target_class_or_null); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); + if (check_kind == TypeCheckKind::kBitstringCheck) { + DCHECK(target_class_or_null->IsNullConstant()); + SetRawInputAt(2, bitstring_path_to_root); + SetRawInputAt(3, bitstring_mask); + } else { + DCHECK(target_class_or_null->IsLoadClass()); + } } HLoadClass* GetTargetClass() const { + DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } + uint32_t GetBitstringPathToRoot() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* path_to_root = InputAt(2); + DCHECK(path_to_root->IsIntConstant()); + return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); + } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + uint32_t GetBitstringMask() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* mask = InputAt(3); + DCHECK(mask->IsIntConstant()); + return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); } - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); + bool IsClonable() const OVERRIDE { return true; } + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); + return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); } - // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. - return check_kind != TypeCheckKind::kExactCheck; + ReferenceTypeInfo GetTargetClassRTI() { + if (GetPackedFlag<kFlagValidTargetClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { - return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + // Target class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidTargetClassRTI>(true); } - DECLARE_INSTRUCTION(InstanceOf); + Handle<mirror::Class> GetClass() const { + return klass_; + } protected: - DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + + Handle<mirror::Class> klass_; +}; + +class HInstanceOf FINAL : public HTypeCheckInstruction { + public: + HInstanceOf(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffectsForArchRuntimeCalls(check_kind)) {} + + DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; } + + bool NeedsEnvironment() const OVERRIDE { + return CanCallRuntime(GetTypeCheckKind()); + } + + static bool CanCallRuntime(TypeCheckKind check_kind) { + // Mips currently does runtime calls for any other checks. + return check_kind != TypeCheckKind::kExactCheck; + } + + static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { + return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + + DECLARE_INSTRUCTION(InstanceOf); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); }; class HBoundType FINAL : public HExpression<1> { @@ -6718,31 +6829,25 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast FINAL : public HTypeCheckInstruction { public: HCheckCast(HInstruction* object, - HLoadClass* target_class, + HInstruction* target_class_or_null, TypeCheckKind check_kind, - uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { - SetPackedField<TypeCheckKindField>(check_kind); - SetPackedFlag<kFlagMustDoNullCheck>(true); - SetRawInputAt(0, object); - SetRawInputAt(1, target_class); - } - - HLoadClass* GetTargetClass() const { - HInstruction* load_class = InputAt(1); - DCHECK(load_class->IsLoadClass()); - return load_class->AsLoadClass(); - } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffects::CanTriggerGC()) {} bool NeedsEnvironment() const OVERRIDE { // Instruction may throw a CheckCastError. @@ -6751,24 +6856,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { bool CanThrow() const OVERRIDE { return true; } - bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } - void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } - TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } - bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; - static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** @@ -7309,19 +7400,19 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { public: explicit CloneAndReplaceInstructionVisitor(HGraph* graph) - : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {} + : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} void VisitInstruction(HInstruction* instruction) OVERRIDE { if (instruction->IsClonable()) { ReplaceInstrOrPhiByClone(instruction); - instr_replaced_by_clones_count++; + instr_replaced_by_clones_count_++; } } - size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; } + size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; } private: - size_t instr_replaced_by_clones_count; + size_t instr_replaced_by_clones_count_; DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a3b1f0c5af..c35c490118 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -382,6 +382,8 @@ class OptimizingCompiler FINAL : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; + void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info); + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -1230,7 +1232,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1246,14 +1248,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1361,7 +1356,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1377,14 +1372,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1408,4 +1396,22 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } +void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + DCHECK(compiler_options.GenerateAnyDebugInfo()); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + GetCompilerDriver()->GetInstructionSet(), + GetCompilerDriver()->GetInstructionSetFeatures(), + mini_debug_info, + ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + MutexLock mu(Thread::Current(), g_jit_debug_mutex); + JITCodeEntry* entry = CreateJITCodeEntry(elf_file); + IncrementJITCodeEntryRefcount(entry, info.code_address); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 0023265e50..a6a2f46d2e 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -99,6 +99,7 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8..59733397bf 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() { } } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { + // Record only those bitstring type checks that make it to the codegen stage. + if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { + // Record only those bitstring type checks that make it to the codegen stage. + if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c1..f6e4d3ef99 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE; + void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE; void VisitNullCheck(HNullCheck* check) OVERRIDE; void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 8bb124e066..178d7fd0f0 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; void VisitLoadString(HLoadString* instr) OVERRIDE; void VisitLoadException(HLoadException* instr) OVERRIDE; @@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } + } else if (instr->IsInstanceOf()) { + HInstanceOf* iof = instr->AsInstanceOf(); + DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); + } else if (instr->IsCheckCast()) { + HCheckCast* check = instr->AsCheckCast(); + DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -644,15 +650,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> resolved_class = instr->GetClass(); - if (IsAdmissible(resolved_class.Get())) { - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( - resolved_class, /* is_exact */ true)); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidLoadedClassRTI(); } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); } +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { + ScopedObjectAccess soa(Thread::Current()); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidTargetClassRTI(); + } +} + void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } @@ -720,8 +731,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -730,12 +739,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - if (class_rti.IsValid()) { + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> klass = check_cast->GetClass(); + if (IsAdmissible(klass.Get())) { DCHECK(is_first_run_); - ScopedObjectAccess soa(Thread::Current()); + check_cast->SetValidTargetClassRTI(); // This is the first run of RTP and class is resolved. - bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + bool is_exact = klass->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 1e49411c72..dffef17587 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -236,6 +236,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!klass->IsProxyClass()); + DCHECK(!klass->IsArrayClass()); + + if (Runtime::Current()->UseJitCompilation()) { + // If we're JITting, try to assign a type check bitstring (fall through). + } else if (codegen->GetCompilerOptions().IsBootImage()) { + const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); + if (!compiler_driver->IsImageClass(descriptor)) { + return false; + } + // If the target is a boot image class, try to assign a type check bitstring (fall through). + // (If --force-determinism, this was already done; repeating is OK and yields the same result.) + } else { + // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring + // already assigned in the boot image. + return false; + } + + // Try to assign a type check bitstring. + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() + // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 + kIsDebugBuild && + codegen->GetCompilerOptions().IsBootImage() && + codegen->GetCompilerOptions().IsForceDeterminism()) { + SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) + << klass->PrettyDescriptor() << "/" << old_state + << " in " << codegen->GetGraph()->PrettyMethod(); + } + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) { + if (klass == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (klass->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (klass->IsArrayClass()) { + if (klass->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (klass->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. + return TypeCheckKind::kExactCheck; + } else if (kUseBitstringTypeCheck && + !needs_access_check && + CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { + // TODO: We should not need the `!needs_access_check` check but getting rid of that + // requires rewriting some optimizations in instruction simplifier. + return TypeCheckKind::kBitstringCheck; + } else if (klass->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91e..fa3e948eeb 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -44,12 +44,10 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by Sharpening and InstructionSimplifier. + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, @@ -58,10 +56,19 @@ class HSharpening : public HOptimization { const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc new file mode 100644 index 0000000000..a7c23bef7e --- /dev/null +++ b/compiler/optimizing/superblock_cloner.cc @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "superblock_cloner.h" + +#include "common_dominator.h" +#include "graph_checker.h" + +#include <iostream> + +namespace art { + +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; + +void HEdge::Dump(std::ostream& stream) const { + stream << "(" << from_ << "->" << to_ << ")"; +} + +// +// Static helper methods. +// + +// Returns whether instruction has any uses (regular or environmental) outside the region, +// defined by basic block set. +static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) { + auto& uses = instr->GetUses(); + for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + auto& env_uses = instr->GetEnvUses(); + for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser()->GetHolder(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + return false; +} + +// Returns whether the phi's inputs are the same HInstruction. +static bool ArePhiInputsTheSame(const HPhi* phi) { + HInstruction* first_input = phi->InputAt(0); + for (size_t i = 1, e = phi->InputCount(); i < e; i++) { + if (phi->InputAt(i) != first_input) { + return false; + } + } + + return true; +} + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 != nullptr || loop2 != nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } else if (loop2->IsIn(*loop1)) { + return loop1; + } + HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); + return block->GetLoopInformation(); +} + +// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. +static void OrderLoopsHeadersPredecessors(HGraph* graph) { + for (HBasicBlock* block : graph->GetPostOrder()) { + if (block->IsLoopHeader()) { + graph->OrderLoopHeaderPredecessors(block); + } + } +} + +// +// Helpers for CloneBasicBlock. +// + +void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) { + DCHECK(!copy_instr->IsPhi()); + for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) { + // Copy instruction holds the same input as the original instruction holds. + HInstruction* orig_input = copy_instr->InputAt(i); + if (!IsInOrigBBSet(orig_input->GetBlock())) { + // Defined outside the subgraph. + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + // copy_instr will be registered as a user of copy_inputs after returning from this function: + // 'copy_block->AddInstruction(copy_instr)'. + copy_instr->SetRawInputAt(i, copy_input); + } +} + +void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, + const HEnvironment* orig_env) { + if (orig_env->GetParent() != nullptr) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent()); + } + HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr); + + for (size_t i = 0; i < orig_env->Size(); i++) { + HInstruction* env_input = orig_env->GetInstructionAt(i); + if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) { + env_input = GetInstrCopy(env_input); + DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr); + } + copy_env->SetRawEnvAt(i, env_input); + if (env_input != nullptr) { + env_input->AddEnvUseAt(copy_env, i); + } + } + // InsertRawEnvironment assumes that instruction already has an environment that's why we use + // SetRawEnvironment in the 'else' case. + // As this function calls itself recursively with the same copy_instr - this copy_instr may + // have partially copied chain of HEnvironments. + if (copy_instr->HasEnvironment()) { + copy_instr->InsertRawEnvironment(copy_env); + } else { + copy_instr->SetRawEnvironment(copy_env); + } +} + +// +// Helpers for RemapEdgesSuccessors. +// + +void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + + size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block); + size_t phi_input_count = 0; + // This flag reflects whether the original successor has at least one phi and this phi + // has been already processed in the loop. Used for validation purposes in DCHECK to check that + // in the end all of the phis in the copy successor have the same number of inputs - the number + // of copy successor's predecessors. + bool first_phi_met = false; + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(this_index); + // Remove corresponding input for original phi. + orig_phi->RemoveInputAt(this_index); + // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds + // to orig_block, so add the input at the end of the list. + copy_phi->AddInput(orig_phi_input); + if (!first_phi_met) { + phi_input_count = copy_phi->InputCount(); + first_phi_met = true; + } else { + DCHECK_EQ(phi_input_count, copy_phi->InputCount()); + } + } + // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds + // to the previously added phi inputs position. + orig_block->ReplaceSuccessor(orig_succ, copy_succ); + DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count); +} + +void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + copy_block->AddSuccessor(copy_succ); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + copy_phi->AddInput(orig_phi_input); + } +} + +void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + DCHECK(copy_block->HasSuccessor(orig_succ)); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + orig_phi->AddInput(orig_phi_input); + } +} + +// +// Local versions of CF calculation/adjustment routines. +// + +// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect +// the performance of the base version by checking the local set. +// TODO: this version works when updating the back edges info for natural loop-based local_set. +// Check which exactly types of subgraphs can be analysed or rename it to +// FindBackEdgesInTheNaturalLoop. +void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) { + ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited.GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. + ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(), + 0u, + arena_->Adapter(kArenaAllocGraphBuilder)); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + + visited.SetBit(entry_block->GetBlockId()); + visiting.SetBit(entry_block->GetBlockId()); + worklist.push_back(entry_block); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (!local_set->IsBitSet(successor_id)) { + continue; + } + + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdgeWhileUpdating(current); + } else if (!visited.IsBitSet(successor_id)) { + visited.SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } +} + +void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { + // TODO: DCHECK that after the transformation the graph is connected. + HBasicBlock* block_entry = nullptr; + + if (outer_loop_ == nullptr) { + for (auto block : graph_->GetBlocks()) { + if (block != nullptr) { + outer_loop_bb_set->SetBit(block->GetBlockId()); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + block_entry = graph_->GetEntryBlock(); + } else { + outer_loop_bb_set->Copy(&outer_loop_bb_set_); + block_entry = outer_loop_->GetHeader(); + + // Add newly created copy blocks. + for (auto entry : *bb_map_) { + outer_loop_bb_set->SetBit(entry.second->GetBlockId()); + } + + // Clear loop_info for the whole outer loop. + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + + FindBackEdgesLocal(block_entry, outer_loop_bb_set); + + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + // Reset LoopInformation for regular blocks and old headers which are no longer loop headers. + if (info != nullptr && + (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) { + block->SetLoopInformation(nullptr); + } + } +} + +// This is a modified version of HGraph::AnalyzeLoops. +GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) { + // We iterate post order to ensure we visit inner loops before outer loops. + // `PopulateRecursive` needs this guarantee to know whether a natural loop + // contains an irreducible loop. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return kAnalysisFailThrowCatchLoop; + } + block->GetLoopInformation()->Populate(); + } + } + + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + HLoopInformation* cur_loop = block->GetLoopInformation(); + HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(cur_loop); + } + } + } + + return kAnalysisSuccess; +} + +void SuperblockCloner::CleanUpControlFlow() { + // TODO: full control flow clean up for now, optimize it. + graph_->ClearDominanceInformation(); + + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + // TODO: do it locally. + graph_->SimplifyCFG(); + graph_->ComputeDominanceInformation(); + + // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just + // before in ComputeDominanceInformation. + GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set); + DCHECK_EQ(result, kAnalysisSuccess); + + // TODO: do it locally + OrderLoopsHeadersPredecessors(graph_); + + graph_->ComputeTryBlockInformation(); +} + +// +// Helpers for ResolveDataFlow +// + +void SuperblockCloner::ResolvePhi(HPhi* phi) { + HBasicBlock* phi_block = phi->GetBlock(); + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* input_block = input->GetBlock(); + + // Originally defined outside the region. + if (!IsInOrigBBSet(input_block)) { + continue; + } + HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i]; + if (!IsInOrigBBSet(corresponding_block)) { + phi->ReplaceInput(GetInstrCopy(input), i); + } + } +} + +// +// Main algorithm methods. +// + +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { + DCHECK(exits->empty()); + for (uint32_t block_id : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(block_id); + for (HBasicBlock* succ : block->GetSuccessors()) { + if (!IsInOrigBBSet(succ)) { + exits->push_back(succ); + } + } + } +} + +void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { + DCHECK(outer_loop_ == nullptr); + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // For a reducible graph we need to update back-edges and dominance information only for + // the outermost loop which is affected by the transformation - it can be found by picking + // the common most outer loop of loops to which the subgraph exits blocks belong. + // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case). + for (HBasicBlock* exit : exits) { + HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation(); + if (loop_exit_loop_info == nullptr) { + outer_loop_ = nullptr; + break; + } + outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); + } + + if (outer_loop_ != nullptr) { + // Save the loop population info as it will be changed later. + outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks()); + } +} + +void SuperblockCloner::RemapEdgesSuccessors() { + // Redirect incoming edges. + for (HEdge e : *remap_incoming_) { + HBasicBlock* orig_block = GetBlockById(e.GetFrom()); + HBasicBlock* orig_succ = GetBlockById(e.GetTo()); + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + + // Redirect internal edges. + for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { + HBasicBlock* orig_block = GetBlockById(orig_block_id); + + for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) { + uint32_t orig_succ_id = orig_succ->GetBlockId(); + + // Check for outgoing edge. + if (!IsInOrigBBSet(orig_succ)) { + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + continue; + } + + auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + + // Due to construction all successors of copied block were set to original. + if (copy_redir != remap_copy_internal_->end()) { + RemapCopyInternalEdge(orig_block, orig_succ); + } else { + AddCopyInternalEdge(orig_block, orig_succ); + } + + if (orig_redir != remap_orig_internal_->end()) { + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + } + } +} + +void SuperblockCloner::AdjustControlFlowInfo() { + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + graph_->ClearDominanceInformation(); + // TODO: Do it locally. + graph_->ComputeDominanceInformation(); +} + +// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to +// the valid values; only phis' inputs must be adjusted. +void SuperblockCloner::ResolveDataFlow() { + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + ResolvePhi(orig_phi); + ResolvePhi(copy_phi); + } + if (kIsDebugBuild) { + // Inputs of instruction copies must be already mapped to correspondent inputs copies. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + CheckInstructionInputsRemapping(it.Current()); + } + } + } +} + +// +// Debug and logging methods. +// + +void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { + DCHECK(!orig_instr->IsPhi()); + HInstruction* copy_instr = GetInstrCopy(orig_instr); + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock())); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (!IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + + // Resolve environment. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + + for (size_t i = 0, e = orig_env->Size(); i < e; ++i) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + } +} + +// +// Public methods. +// + +SuperblockCloner::SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map) + : graph_(graph), + arena_(graph->GetAllocator()), + orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + remap_orig_internal_(nullptr), + remap_copy_internal_(nullptr), + remap_incoming_(nullptr), + bb_map_(bb_map), + hir_map_(hir_map), + outer_loop_(nullptr), + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + orig_bb_set_.Copy(orig_bb_set); +} + +void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming) { + remap_orig_internal_ = remap_orig_internal; + remap_copy_internal_ = remap_copy_internal; + remap_incoming_ = remap_incoming; +} + +bool SuperblockCloner::IsSubgraphClonable() const { + // TODO: Support irreducible graphs and graphs with try-catch. + if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) { + return false; + } + + // Check that there are no instructions defined in the subgraph and used outside. + // TODO: Improve this by accepting graph with such uses but only one exit. + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + } + + return true; +} + +void SuperblockCloner::Run() { + DCHECK(bb_map_ != nullptr); + DCHECK(hir_map_ != nullptr); + DCHECK(remap_orig_internal_ != nullptr && + remap_copy_internal_ != nullptr && + remap_incoming_ != nullptr); + DCHECK(IsSubgraphClonable()); + + // Find an area in the graph for which control flow information should be adjusted. + FindAndSetLocalAreaForAdjustments(); + // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be + // adjusted. + CloneBasicBlocks(); + // Connect the blocks together/remap successors and fix phis which are directly affected my the + // remapping. + RemapEdgesSuccessors(); + // Recalculate dominance and backedge information which is required by the next stage. + AdjustControlFlowInfo(); + // Fix data flow of the graph. + ResolveDataFlow(); +} + +void SuperblockCloner::CleanUp() { + CleanUpControlFlow(); + + // Remove phis which have all inputs being same. + // When a block has a single predecessor it must not have any phis. However after the + // transformation it could happen that there is such block with a phi with a single input. + // As this is needed to be processed we also simplify phis with multiple same inputs here. + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + orig_block->RemovePhi(phi); + } + } + + HBasicBlock* copy_block = GetBlockCopy(orig_block); + for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + copy_block->RemovePhi(phi); + } + } + } +} + +HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { + HGraph* graph = orig_block->GetGraph(); + HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc()); + graph->AddBlock(copy_block); + + // Clone all the phis and add them to the map. + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + copy_block->AddPhi(copy_instr->AsPhi()); + copy_instr->AsPhi()->RemoveAllInputs(); + DCHECK(!orig_instr->HasEnvironment()); + hir_map_->Put(orig_instr, copy_instr); + } + + // Clone all the instructions and add them to the map. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + ReplaceInputsWithCopies(copy_instr); + copy_block->AddInstruction(copy_instr); + if (orig_instr->HasEnvironment()) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment()); + } + hir_map_->Put(orig_instr, copy_instr); + } + + return copy_block; +} + +void SuperblockCloner::CloneBasicBlocks() { + // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied + // instructions might be replaced by copies of the original inputs (depending where those inputs + // are defined). So the definitions of the original inputs must be visited before their original + // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')" + // guarantees that. + for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) { + if (!IsInOrigBBSet(orig_block)) { + continue; + } + HBasicBlock* copy_block = CloneBasicBlock(orig_block); + bb_map_->Put(orig_block, copy_block); + if (kSuperblockClonerLogging) { + std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() << + std::endl; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h new file mode 100644 index 0000000000..23de692673 --- /dev/null +++ b/compiler/optimizing/superblock_cloner.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ +#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ + +#include "base/arena_bit_vector.h" +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" +#include "nodes.h" + +namespace art { + +static const bool kSuperblockClonerLogging = false; +static const bool kSuperblockClonerVerify = false; + +// Represents an edge between two HBasicBlocks. +// +// Note: objects of this class are small - pass them by value. +class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { + public: + HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} + + uint32_t GetFrom() const { return from_; } + uint32_t GetTo() const { return to_; } + + bool operator==(const HEdge& other) const { + return this->from_ == other.from_ && this->to_ == other.to_; + } + + bool operator!=(const HEdge& other) const { return !operator==(other); } + void Dump(std::ostream& stream) const; + + // Returns whether an edge represents a valid edge in CF graph: whether the from_ block + // has to_ block as a successor. + bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } + + private: + // Predecessor block id. + uint32_t from_; + // Successor block id. + uint32_t to_; +}; + +// Returns whether a HEdge edge corresponds to an existing edge in the graph. +inline bool IsEdgeValid(HEdge edge, HGraph* graph) { + if (!edge.IsValid()) { + return false; + } + uint32_t from = edge.GetFrom(); + uint32_t to = edge.GetTo(); + if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { + return false; + } + + HBasicBlock* block_from = graph->GetBlocks()[from]; + HBasicBlock* block_to = graph->GetBlocks()[to]; + if (block_from == nullptr || block_to == nullptr) { + return false; + } + + return block_from->HasSuccessor(block_to, 0); +} + +// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without +// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted +// automatically. The clone transformation is defined by specifying a set of basic blocks to copy +// and a set of rules how to treat edges, remap their successors. By using this approach such +// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. +// +// The idea of the transformation is based on "Superblock cloning" technique described in the book +// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University +// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective +// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. +// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. +// +// There are two states of the IR graph: original graph (before the transformation) and +// copy graph (after). +// +// Before the transformation: +// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original +// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", +// where pred, succ - basic blocks): +// - internal - pred, succ are members of ‘orig_bb_set’. +// - outside - pred, succ are not members of ‘orig_bb_set’. +// - incoming - pred is not a member of ‘orig_bb_set’, succ is. +// - outgoing - pred is a member of ‘orig_bb_set’, succ is not. +// +// Transformation: +// +// 1. Initial cloning: +// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks +// form ‘copy_bb_set’. +// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge +// set. +// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge +// set. +// 2. Successors remapping. +// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should +// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). +// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors +// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). +// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph +// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into +// (X, Y_1)). +// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). +// 4. Fix/resolve data flow. +// 5. Do cleanups (DCE, critical edges splitting, etc). +// +class SuperblockCloner : public ValueObject { + public: + // TODO: Investigate optimal types for the containers. + using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; + using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; + using HBasicBlockSet = ArenaBitVector; + using HEdgeSet = ArenaHashSet<HEdge>; + + SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map); + + // Sets edge successor remapping info specified by corresponding edge sets. + void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming); + + // Returns whether the specified subgraph is copyable. + // TODO: Start from small range of graph patterns then extend it. + bool IsSubgraphClonable() const; + + // Runs the copy algorithm according to the description. + void Run(); + + // Cleans up the graph after transformation: splits critical edges, recalculates control flow + // information (back-edges, dominators, loop info, etc), eliminates redundant phis. + void CleanUp(); + + // Returns a clone of a basic block (orig_block). + // + // - The copy block will have no successors/predecessors; they should be set up manually. + // - For each instruction in the orig_block a copy is created and inserted into the copy block; + // this correspondence is recorded in the map (old instruction, new instruction). + // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the + // same, as in the original block, PHIs do not reflect a correct correspondence between the + // value and predecessors (as the copy block has no predecessors by now), etc. + HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); + + // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ + // and hir_map_. + void CloneBasicBlocks(); + + HInstruction* GetInstrCopy(HInstruction* orig_instr) const { + auto copy_input_iter = hir_map_->find(orig_instr); + DCHECK(copy_input_iter != hir_map_->end()); + return copy_input_iter->second; + } + + HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { + HBasicBlock* block = bb_map_->Get(orig_block); + DCHECK(block != nullptr); + return block; + } + + HInstruction* GetInstrOrig(HInstruction* copy_instr) const { + for (auto it : *hir_map_) { + if (it.second == copy_instr) { + return it.first; + } + } + return nullptr; + } + + bool IsInOrigBBSet(uint32_t block_id) const { + return orig_bb_set_.IsBitSet(block_id); + } + + bool IsInOrigBBSet(const HBasicBlock* block) const { + return IsInOrigBBSet(block->GetBlockId()); + } + + private: + // Fills the 'exits' vector with the subgraph exits. + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + + // Finds and records information about the area in the graph for which control-flow (back edges, + // loops, dominators) needs to be adjusted. + void FindAndSetLocalAreaForAdjustments(); + + // Remaps edges' successors according to the info specified in the edges sets. + // + // Only edge successors/predecessors and phis' input records (to have a correspondence between + // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither + // phis' nor instructions' inputs values are resolved. + void RemapEdgesSuccessors(); + + // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // FindAndSetLocalAreaForAdjustments. + void AdjustControlFlowInfo(); + + // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in + // the SSA form. + void ResolveDataFlow(); + + // + // Helpers for CloneBasicBlock. + // + + // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the + // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. + void ReplaceInputsWithCopies(HInstruction* copy_instr); + + // Recursively clones the environment for the copy instruction. If the input of the original + // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise + // leaves it the same as original. + void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); + + // + // Helpers for RemapEdgesSuccessors. + // + + // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and + // copy_succ. + void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. + void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. + void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // + // Local versions of control flow calculation/adjustment routines. + // + + void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); + void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); + GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); + void CleanUpControlFlow(); + + // + // Helpers for ResolveDataFlow + // + + // Resolves the inputs of the phi. + void ResolvePhi(HPhi* phi); + + // + // Debug and logging methods. + // + void CheckInstructionInputsRemapping(HInstruction* orig_instr); + + HBasicBlock* GetBlockById(uint32_t block_id) const { + DCHECK(block_id < graph_->GetBlocks().size()); + HBasicBlock* block = graph_->GetBlocks()[block_id]; + DCHECK(block != nullptr); + return block; + } + + HGraph* const graph_; + ArenaAllocator* const arena_; + + // Set of basic block in the original graph to be copied. + HBasicBlockSet orig_bb_set_; + + // Sets of edges which require successors remapping. + const HEdgeSet* remap_orig_internal_; + const HEdgeSet* remap_copy_internal_; + const HEdgeSet* remap_incoming_; + + // Correspondence map for blocks: (original block, copy block). + HBasicBlockMap* bb_map_; + // Correspondence map for instructions: (original HInstruction, copy HInstruction). + HInstructionMap* hir_map_; + // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + HLoopInformation* outer_loop_; + HBasicBlockSet outer_loop_bb_set_; + + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + + DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); +}; + +} // namespace art + +namespace std { + +template <> +struct hash<art::HEdge> { + size_t operator()(art::HEdge const& x) const noexcept { + // Use Cantor pairing function as the hash function. + uint32_t a = x.GetFrom(); + uint32_t b = x.GetTo(); + return (a + b) * (a + b + 1) / 2 + b; + } +}; + +} // namespace std + +#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index fd77eb81fc..f1b7bffdf5 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -17,11 +17,15 @@ #include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" +#include "superblock_cloner.h" #include "gtest/gtest.h" namespace art { +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; + // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. class SuperblockClonerTest : public OptimizingUnitTest { @@ -182,4 +186,121 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) { EXPECT_NE(new_suspend_check, nullptr); } +// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of +// instructions' inputs. +TEST_F(SuperblockClonerTest, CloneBasicBlocks) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.CloneBasicBlocks(); + + EXPECT_EQ(bb_map.size(), 2u); + EXPECT_EQ(hir_map.size(), 12u); + + for (auto it : hir_map) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + + EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock()); + EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind()); + EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType()); + + if (orig_instr->IsPhi()) { + continue; + } + + EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount()); + + // Check that inputs match. + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + HInstruction* copy_input = copy_instr->InputAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + + EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment()); + + // Check that environments match. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + HEnvironment* copy_env = copy_instr->GetEnvironment(); + + EXPECT_EQ(copy_env->GetParent(), nullptr); + EXPECT_EQ(orig_env->Size(), copy_env->Size()); + + for (size_t i = 0, e = orig_env->Size(); i < e; i++) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + HInstruction* copy_input = copy_env->GetInstructionAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + } + } +} + +// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control +// flow. +TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + nullptr, + nullptr); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.FindAndSetLocalAreaForAdjustments(); + cloner.CleanUpControlFlow(); + + EXPECT_TRUE(CheckGraph()); + + EXPECT_TRUE(entry_block_->Dominates(header)); + EXPECT_TRUE(entry_block_->Dominates(exit_block_)); + + EXPECT_EQ(header->GetLoopInformation(), loop_info); + EXPECT_EQ(loop_info->GetHeader(), header); + EXPECT_TRUE(loop_info->Contains(*loop_body)); + EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); +} + } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 0a094352e4..674dc9a78b 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", + " 224: f8d9 c2c4 ldr.w ip, [r9, #708] ; 0x2c4\n", " 228: 47e0 blx ip\n", nullptr }; |