diff options
Diffstat (limited to 'compiler')
83 files changed, 3639 insertions, 1346 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 3f61e8eb1b..7a257b649f 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -61,6 +61,7 @@ LIBART_COMPILER_SRC_FILES := \ driver/dex_compilation_unit.cc \ linker/buffered_output_stream.cc \ linker/file_output_stream.cc \ + linker/multi_oat_relative_patcher.cc \ linker/output_stream.cc \ linker/vector_output_stream.cc \ linker/relative_patcher.cc \ @@ -141,7 +142,9 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ jni/quick/arm64/calling_convention_arm64.cc \ linker/arm64/relative_patcher_arm64.cc \ optimizing/code_generator_arm64.cc \ + optimizing/instruction_simplifier_arm.cc \ optimizing/instruction_simplifier_arm64.cc \ + optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm64.cc \ utils/arm64/assembler_arm64.cc \ utils/arm64/managed_register_arm64.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index e4bfac9ee7..239bc590e9 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -194,16 +194,15 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe kind, isa, instruction_set_features_.get(), - true, + /* boot_image */ true, GetImageClasses(), GetCompiledClasses(), GetCompiledMethods(), - 2, - true, - true, + /* thread_count */ 2, + /* dump_stats */ true, + /* dump_passes */ true, timer_.get(), - -1, - /* dex_to_oat_map */ nullptr, + /* swap_fd */ -1, GetProfileCompilationInfo())); // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index 8fd20aa428..32f624acd3 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -232,8 +232,7 @@ static void WriteDebugLocEntry(const MethodDebugInfo* method_info, // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind(). // kInRegisterHigh and kInFpuRegisterHigh should be handled by // the special cases above and they should not occur alone. - LOG(ERROR) << "Unexpected register location kind: " - << DexRegisterLocation::PrettyDescriptor(kind); + LOG(ERROR) << "Unexpected register location kind: " << kind; break; } if (is64bitValue) { diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index 0cd41bbf4c..6c6c9cfb1e 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -84,17 +84,16 @@ class QuickCFITest : public CFITest { Compiler::kQuick, isa, isa_features.get(), - false, - nullptr, - nullptr, - nullptr, - 0, - false, - false, - 0, - -1, - nullptr, - nullptr); + /* boot_image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 0, + /* dump_stats */ false, + /* dump_passes */ false, + /* timer */ nullptr, + /* swap_fd */ -1, + /* profile_compilation_info */ nullptr); ClassLinker* linker = nullptr; CompilationUnit cu(&pool, isa, &driver, linker); DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index efdc333261..ff0ecea94c 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -64,18 +64,17 @@ class QuickAssembleX86TestBase : public testing::Test { method_inliner_map_.get(), Compiler::kQuick, isa_, - nullptr, - false, - nullptr, - nullptr, - nullptr, - 0, - false, - false, - 0, - -1, - nullptr, - nullptr)); + /* instruction_set_features*/ nullptr, + /* boot_image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 0, + /* dump_stats */ false, + /* dump_passes */ false, + /* timer */ nullptr, + /* swap_fd */ -1, + /* profile_compilation_info */ nullptr)); cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr)); DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>( cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc)); diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 2e2d1f99f3..0695cb56b3 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -32,19 +32,19 @@ TEST(CompiledMethodStorage, Deduplicate) { CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map, - Compiler::kOptimizing, kNone, - nullptr, - false, - nullptr, - nullptr, - nullptr, - 1u, - false, - false, - nullptr, - -1, - nullptr, - nullptr); + Compiler::kOptimizing, + /* instruction_set_ */ kNone, + /* instruction_set_features */ nullptr, + /* boot_image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 1u, + /* dump_stats */ false, + /* dump_passes */ false, + /* timer */ nullptr, + /* swap_fd */ -1, + /* profile_compilation_info */ nullptr); CompiledMethodStorage* storage = driver.GetCompiledMethodStorage(); ASSERT_TRUE(storage->DedupeEnabled()); // The default. diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index db8c3abccf..a9fec30bfe 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -342,12 +342,15 @@ CompilerDriver::CompilerDriver( Compiler::Kind compiler_kind, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, - bool boot_image, std::unordered_set<std::string>* image_classes, + bool boot_image, + std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, - size_t thread_count, bool dump_stats, bool dump_passes, - CumulativeLogger* timer, int swap_fd, - const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + size_t thread_count, + bool dump_stats, + bool dump_passes, + CumulativeLogger* timer, + int swap_fd, const ProfileCompilationInfo* profile_compilation_info) : compiler_options_(compiler_options), verification_results_(verification_results), @@ -374,7 +377,6 @@ CompilerDriver::CompilerDriver( compiler_context_(nullptr), support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), dex_files_for_oat_file_(nullptr), - dex_file_oat_filename_map_(dex_to_oat_map), compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info) { DCHECK(compiler_options_ != nullptr); @@ -1077,10 +1079,8 @@ static void MaybeAddToImageClasses(Handle<mirror::Class> c, image_classes); } for (auto& m : c->GetVirtualMethods(pointer_size)) { - if (m.IsMiranda() || (true)) { - StackHandleScope<1> hs2(self); - MaybeAddToImageClasses(hs2.NewHandle(m.GetDeclaringClass()), image_classes); - } + StackHandleScope<1> hs2(self); + MaybeAddToImageClasses(hs2.NewHandle(m.GetDeclaringClass()), image_classes); } if (klass->IsArrayClass()) { StackHandleScope<1> hs2(self); @@ -1677,12 +1677,6 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType use_dex_cache = true; } } - if (!use_dex_cache && IsBootImage()) { - if (!AreInSameOatFile(&(const_cast<mirror::Class*>(referrer_class)->GetDexFile()), - &declaring_class->GetDexFile())) { - use_dex_cache = true; - } - } // The method is defined not within this dex file. We need a dex cache slot within the current // dex file or direct pointers. bool must_use_direct_pointers = false; diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index d8f23f7a73..42a5bc15e4 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -94,9 +94,11 @@ class CompilerDriver { bool boot_image, std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, - size_t thread_count, bool dump_stats, bool dump_passes, - CumulativeLogger* timer, int swap_fd, - const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + size_t thread_count, + bool dump_stats, + bool dump_passes, + CumulativeLogger* timer, + int swap_fd, const ProfileCompilationInfo* profile_compilation_info); ~CompilerDriver(); @@ -113,20 +115,6 @@ class CompilerDriver { : ArrayRef<const DexFile* const>(); } - // Are the given dex files compiled into the same oat file? Should only be called after - // GetDexFilesForOatFile, as the conservative answer (when we don't have a map) is true. - bool AreInSameOatFile(const DexFile* d1, const DexFile* d2) { - if (dex_file_oat_filename_map_ == nullptr) { - // TODO: Check for this wrt/ apps and boot image calls. - return true; - } - auto it1 = dex_file_oat_filename_map_->find(d1); - DCHECK(it1 != dex_file_oat_filename_map_->end()); - auto it2 = dex_file_oat_filename_map_->find(d2); - DCHECK(it2 != dex_file_oat_filename_map_->end()); - return it1->second == it2->second; - } - void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) @@ -701,9 +689,6 @@ class CompilerDriver { // List of dex files that will be stored in the oat file. const std::vector<const DexFile*>* dex_files_for_oat_file_; - // Map from dex files to the oat file (name) they will be compiled into. - const std::unordered_map<const DexFile*, const char*>* dex_file_oat_filename_map_; - CompiledMethodStorage compiled_method_storage_; // Info for profile guided compilation. diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index b673eeb3b6..f7da609e5d 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -86,12 +86,24 @@ class ElfBuilder FINAL { // Base class of all sections. class Section : public OutputStream { public: - Section(ElfBuilder<ElfTypes>* owner, const std::string& name, - Elf_Word type, Elf_Word flags, const Section* link, - Elf_Word info, Elf_Word align, Elf_Word entsize) - : OutputStream(name), owner_(owner), header_(), - section_index_(0), name_(name), link_(link), - started_(false), finished_(false), phdr_flags_(PF_R), phdr_type_(0) { + Section(ElfBuilder<ElfTypes>* owner, + const std::string& name, + Elf_Word type, + Elf_Word flags, + const Section* link, + Elf_Word info, + Elf_Word align, + Elf_Word entsize) + : OutputStream(name), + owner_(owner), + header_(), + section_index_(0), + name_(name), + link_(link), + started_(false), + finished_(false), + phdr_flags_(PF_R), + phdr_type_(0) { DCHECK_GE(align, 1u); header_.sh_type = type; header_.sh_flags = flags; @@ -228,12 +240,84 @@ class ElfBuilder FINAL { DISALLOW_COPY_AND_ASSIGN(Section); }; - // Writer of .dynstr .strtab and .shstrtab sections. + class CachedSection : public Section { + public: + CachedSection(ElfBuilder<ElfTypes>* owner, + const std::string& name, + Elf_Word type, + Elf_Word flags, + const Section* link, + Elf_Word info, + Elf_Word align, + Elf_Word entsize) + : Section(owner, name, type, flags, link, info, align, entsize), cache_() { } + + Elf_Word Add(const void* data, size_t length) { + Elf_Word offset = cache_.size(); + const uint8_t* d = reinterpret_cast<const uint8_t*>(data); + cache_.insert(cache_.end(), d, d + length); + return offset; + } + + Elf_Word GetCacheSize() { + return cache_.size(); + } + + void Write() { + this->WriteFully(cache_.data(), cache_.size()); + cache_.clear(); + cache_.shrink_to_fit(); + } + + void WriteCachedSection() { + this->Start(); + Write(); + this->End(); + } + + private: + std::vector<uint8_t> cache_; + }; + + // Writer of .dynstr section. + class CachedStringSection FINAL : public CachedSection { + public: + CachedStringSection(ElfBuilder<ElfTypes>* owner, + const std::string& name, + Elf_Word flags, + Elf_Word align) + : CachedSection(owner, + name, + SHT_STRTAB, + flags, + /* link */ nullptr, + /* info */ 0, + align, + /* entsize */ 0) { } + + Elf_Word Add(const std::string& name) { + if (CachedSection::GetCacheSize() == 0u) { + DCHECK(name.empty()); + } + return CachedSection::Add(name.c_str(), name.length() + 1); + } + }; + + // Writer of .strtab and .shstrtab sections. class StringSection FINAL : public Section { public: - StringSection(ElfBuilder<ElfTypes>* owner, const std::string& name, - Elf_Word flags, Elf_Word align) - : Section(owner, name, SHT_STRTAB, flags, nullptr, 0, align, 0), + StringSection(ElfBuilder<ElfTypes>* owner, + const std::string& name, + Elf_Word flags, + Elf_Word align) + : Section(owner, + name, + SHT_STRTAB, + flags, + /* link */ nullptr, + /* info */ 0, + align, + /* entsize */ 0), current_offset_(0) { } @@ -252,42 +336,60 @@ class ElfBuilder FINAL { }; // Writer of .dynsym and .symtab sections. - class SymbolSection FINAL : public Section { + class SymbolSection FINAL : public CachedSection { public: - SymbolSection(ElfBuilder<ElfTypes>* owner, const std::string& name, - Elf_Word type, Elf_Word flags, StringSection* strtab) - : Section(owner, name, type, flags, strtab, 0, - sizeof(Elf_Off), sizeof(Elf_Sym)) { + SymbolSection(ElfBuilder<ElfTypes>* owner, + const std::string& name, + Elf_Word type, + Elf_Word flags, + Section* strtab) + : CachedSection(owner, + name, + type, + flags, + strtab, + /* info */ 0, + sizeof(Elf_Off), + sizeof(Elf_Sym)) { + // The symbol table always has to start with NULL symbol. + Elf_Sym null_symbol = Elf_Sym(); + CachedSection::Add(&null_symbol, sizeof(null_symbol)); } // Buffer symbol for this section. It will be written later. // If the symbol's section is null, it will be considered absolute (SHN_ABS). // (we use this in JIT to reference code which is stored outside the debug ELF file) - void Add(Elf_Word name, const Section* section, - Elf_Addr addr, bool is_relative, Elf_Word size, - uint8_t binding, uint8_t type, uint8_t other = 0) { + void Add(Elf_Word name, + const Section* section, + Elf_Addr addr, + bool is_relative, + Elf_Word size, + uint8_t binding, + uint8_t type, + uint8_t other = 0) { + DCHECK(section != nullptr || !is_relative); + Elf_Addr abs_addr = addr + (is_relative ? section->GetAddress() : 0); + Elf_Word section_index = + (section != nullptr) ? section->GetSectionIndex() : static_cast<Elf_Word>(SHN_ABS); + Add(name, section_index, abs_addr, size, binding, type, other); + } + + void Add(Elf_Word name, + Elf_Word section_index, + Elf_Addr addr, + Elf_Word size, + uint8_t binding, + uint8_t type, + uint8_t other = 0) { Elf_Sym sym = Elf_Sym(); sym.st_name = name; - sym.st_value = addr + (is_relative ? section->GetAddress() : 0); + sym.st_value = addr; sym.st_size = size; sym.st_other = other; - sym.st_shndx = (section != nullptr ? section->GetSectionIndex() - : static_cast<Elf_Word>(SHN_ABS)); + sym.st_shndx = section_index; sym.st_info = (binding << 4) + (type & 0xf); - symbols_.push_back(sym); - } - - void Write() { - // The symbol table always has to start with NULL symbol. - Elf_Sym null_symbol = Elf_Sym(); - this->WriteFully(&null_symbol, sizeof(null_symbol)); - this->WriteFully(symbols_.data(), symbols_.size() * sizeof(symbols_[0])); - symbols_.clear(); - symbols_.shrink_to_fit(); + CachedSection::Add(&sym, sizeof(sym)); } - - private: - std::vector<Elf_Sym> symbols_; }; ElfBuilder(InstructionSet isa, OutputStream* output) @@ -309,6 +411,8 @@ class ElfBuilder FINAL { debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0), shstrtab_(this, ".shstrtab", 0, 1), started_(false), + write_program_headers_(false), + loaded_size_(0u), virtual_address_(0) { text_.phdr_flags_ = PF_R | PF_X; bss_.phdr_flags_ = PF_R | PF_W; @@ -380,6 +484,14 @@ class ElfBuilder FINAL { void End() { DCHECK(started_); + // Note: loaded_size_ == 0 for tests that don't write .rodata, .text, .bss, + // .dynstr, dynsym, .hash and .dynamic. These tests should not read loaded_size_. + // TODO: Either refactor the .eh_frame creation so that it counts towards loaded_size_, + // or remove all support for .eh_frame. (The currently unused .eh_frame counts towards + // the virtual_address_ but we don't consider it for loaded_size_.) + CHECK(loaded_size_ == 0 || loaded_size_ == RoundUp(virtual_address_, kPageSize)) + << loaded_size_ << " " << virtual_address_; + // Write section names and finish the section headers. shstrtab_.Start(); shstrtab_.Write(""); @@ -434,45 +546,58 @@ class ElfBuilder FINAL { // information like the address and size of .rodata and .text. // It also contains other metadata like the SONAME. // The .dynamic section is found using the PT_DYNAMIC program header. - void WriteDynamicSection(const std::string& elf_file_path) { + void PrepareDynamicSection(const std::string& elf_file_path, + Elf_Word rodata_size, + Elf_Word text_size, + Elf_Word bss_size) { std::string soname(elf_file_path); size_t directory_separator_pos = soname.rfind('/'); if (directory_separator_pos != std::string::npos) { soname = soname.substr(directory_separator_pos + 1); } - dynstr_.Start(); - dynstr_.Write(""); // dynstr should start with empty string. - dynsym_.Add(dynstr_.Write("oatdata"), &rodata_, 0, true, - rodata_.GetSize(), STB_GLOBAL, STT_OBJECT); - if (text_.GetSize() != 0u) { - dynsym_.Add(dynstr_.Write("oatexec"), &text_, 0, true, - text_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_.Add(dynstr_.Write("oatlastword"), &text_, text_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); - } else if (rodata_.GetSize() != 0) { + // Calculate addresses of .text, .bss and .dynstr. + DCHECK_EQ(rodata_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); + DCHECK_EQ(text_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); + DCHECK_EQ(bss_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); + DCHECK_EQ(dynstr_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); + Elf_Word rodata_address = rodata_.GetAddress(); + Elf_Word text_address = RoundUp(rodata_address + rodata_size, kPageSize); + Elf_Word bss_address = RoundUp(text_address + text_size, kPageSize); + Elf_Word dynstr_address = RoundUp(bss_address + bss_size, kPageSize); + + // Cache .dynstr, .dynsym and .hash data. + dynstr_.Add(""); // dynstr should start with empty string. + Elf_Word rodata_index = rodata_.GetSectionIndex(); + Elf_Word oatdata = dynstr_.Add("oatdata"); + dynsym_.Add(oatdata, rodata_index, rodata_address, rodata_size, STB_GLOBAL, STT_OBJECT); + if (text_size != 0u) { + Elf_Word text_index = rodata_index + 1u; + Elf_Word oatexec = dynstr_.Add("oatexec"); + dynsym_.Add(oatexec, text_index, text_address, text_size, STB_GLOBAL, STT_OBJECT); + Elf_Word oatlastword = dynstr_.Add("oatlastword"); + Elf_Word oatlastword_address = text_address + text_size - 4; + dynsym_.Add(oatlastword, text_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); + } else if (rodata_size != 0) { // rodata_ can be size 0 for dwarf_test. - dynsym_.Add(dynstr_.Write("oatlastword"), &rodata_, rodata_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); + Elf_Word oatlastword = dynstr_.Add("oatlastword"); + Elf_Word oatlastword_address = rodata_address + rodata_size - 4; + dynsym_.Add(oatlastword, rodata_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); } - if (bss_.finished_) { - dynsym_.Add(dynstr_.Write("oatbss"), &bss_, - 0, true, bss_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_.Add(dynstr_.Write("oatbsslastword"), &bss_, - bss_.GetSize() - 4, true, 4, STB_GLOBAL, STT_OBJECT); + if (bss_size != 0u) { + Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u); + Elf_Word oatbss = dynstr_.Add("oatbss"); + dynsym_.Add(oatbss, bss_index, bss_address, bss_size, STB_GLOBAL, STT_OBJECT); + Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword"); + Elf_Word bsslastword_address = bss_address + bss_size - 4; + dynsym_.Add(oatbsslastword, bss_index, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); } - Elf_Word soname_offset = dynstr_.Write(soname); - dynstr_.End(); - - dynsym_.Start(); - dynsym_.Write(); - dynsym_.End(); + Elf_Word soname_offset = dynstr_.Add(soname); // We do not really need a hash-table since there is so few entries. // However, the hash-table is the only way the linker can actually // determine the number of symbols in .dynsym so it is required. - hash_.Start(); - int count = dynsym_.GetSize() / sizeof(Elf_Sym); // Includes NULL. + int count = dynsym_.GetCacheSize() / sizeof(Elf_Sym); // Includes NULL. std::vector<Elf_Word> hash; hash.push_back(1); // Number of buckets. hash.push_back(count); // Number of chains. @@ -484,21 +609,44 @@ class ElfBuilder FINAL { hash.push_back(i + 1); // Each symbol points to the next one. } hash.push_back(0); // Last symbol terminates the chain. - hash_.WriteFully(hash.data(), hash.size() * sizeof(hash[0])); - hash_.End(); + hash_.Add(hash.data(), hash.size() * sizeof(hash[0])); + + // Calculate addresses of .dynsym, .hash and .dynamic. + DCHECK_EQ(dynstr_.header_.sh_flags, dynsym_.header_.sh_flags); + DCHECK_EQ(dynsym_.header_.sh_flags, hash_.header_.sh_flags); + Elf_Word dynsym_address = + RoundUp(dynstr_address + dynstr_.GetCacheSize(), dynsym_.header_.sh_addralign); + Elf_Word hash_address = + RoundUp(dynsym_address + dynsym_.GetCacheSize(), hash_.header_.sh_addralign); + DCHECK_EQ(dynamic_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); + Elf_Word dynamic_address = RoundUp(hash_address + dynsym_.GetCacheSize(), kPageSize); - dynamic_.Start(); Elf_Dyn dyns[] = { - { DT_HASH, { hash_.GetAddress() } }, - { DT_STRTAB, { dynstr_.GetAddress() } }, - { DT_SYMTAB, { dynsym_.GetAddress() } }, + { DT_HASH, { hash_address } }, + { DT_STRTAB, { dynstr_address } }, + { DT_SYMTAB, { dynsym_address } }, { DT_SYMENT, { sizeof(Elf_Sym) } }, - { DT_STRSZ, { dynstr_.GetSize() } }, + { DT_STRSZ, { dynstr_.GetCacheSize() } }, { DT_SONAME, { soname_offset } }, { DT_NULL, { 0 } }, }; - dynamic_.WriteFully(&dyns, sizeof(dyns)); - dynamic_.End(); + dynamic_.Add(&dyns, sizeof(dyns)); + + loaded_size_ = RoundUp(dynamic_address + dynamic_.GetCacheSize(), kPageSize); + } + + void WriteDynamicSection() { + dynstr_.WriteCachedSection(); + dynsym_.WriteCachedSection(); + hash_.WriteCachedSection(); + dynamic_.WriteCachedSection(); + + CHECK_EQ(loaded_size_, RoundUp(dynamic_.GetAddress() + dynamic_.GetSize(), kPageSize)); + } + + Elf_Word GetLoadedSize() { + CHECK_NE(loaded_size_, 0u); + return loaded_size_; } // Returns true if all writes and seeks on the output stream succeeded. @@ -676,10 +824,10 @@ class ElfBuilder FINAL { Section rodata_; Section text_; Section bss_; - StringSection dynstr_; + CachedStringSection dynstr_; SymbolSection dynsym_; - Section hash_; - Section dynamic_; + CachedSection hash_; + CachedSection dynamic_; Section eh_frame_; Section eh_frame_hdr_; StringSection strtab_; @@ -694,12 +842,14 @@ class ElfBuilder FINAL { std::vector<Section*> sections_; bool started_; + bool write_program_headers_; + + // The size of the memory taken by the ELF file when loaded. + size_t loaded_size_; // Used for allocation of virtual address space. Elf_Addr virtual_address_; - size_t write_program_headers_; - DISALLOW_COPY_AND_ASSIGN(ElfBuilder); }; diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h index d50a08cb20..c9ea0083d5 100644 --- a/compiler/elf_writer.h +++ b/compiler/elf_writer.h @@ -52,14 +52,12 @@ class ElfWriter { virtual ~ElfWriter() {} virtual void Start() = 0; - virtual void PrepareDebugInfo(size_t rodata_section_size, - size_t text_section_size, - const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0; + virtual void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) = 0; + virtual void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0; virtual OutputStream* StartRoData() = 0; virtual void EndRoData(OutputStream* rodata) = 0; virtual OutputStream* StartText() = 0; virtual void EndText(OutputStream* text) = 0; - virtual void SetBssSize(size_t bss_size) = 0; virtual void WriteDynamicSection() = 0; virtual void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0; virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0; @@ -70,6 +68,9 @@ class ElfWriter { // should Seek() back to the position where the stream was before this operation. virtual OutputStream* GetStream() = 0; + // Get the size that the loaded ELF file will occupy in memory. + virtual size_t GetLoadedSize() = 0; + protected: ElfWriter() = default; }; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 1d71e572d7..19346ecc2b 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -88,14 +88,12 @@ class ElfWriterQuick FINAL : public ElfWriter { ~ElfWriterQuick(); void Start() OVERRIDE; - void PrepareDebugInfo(size_t rodata_section_size, - size_t text_section_size, - const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE; + void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) OVERRIDE; + void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE; OutputStream* StartRoData() OVERRIDE; void EndRoData(OutputStream* rodata) OVERRIDE; OutputStream* StartText() OVERRIDE; void EndText(OutputStream* text) OVERRIDE; - void SetBssSize(size_t bss_size) OVERRIDE; void WriteDynamicSection() OVERRIDE; void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE; void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE; @@ -103,12 +101,17 @@ class ElfWriterQuick FINAL : public ElfWriter { virtual OutputStream* GetStream() OVERRIDE; + size_t GetLoadedSize() OVERRIDE; + static void EncodeOatPatches(const std::vector<uintptr_t>& locations, std::vector<uint8_t>* buffer); private: const CompilerOptions* const compiler_options_; File* const elf_file_; + size_t rodata_size_; + size_t text_size_; + size_t bss_size_; std::unique_ptr<BufferedOutputStream> output_stream_; std::unique_ptr<ElfBuilder<ElfTypes>> builder_; std::unique_ptr<DebugInfoTask> debug_info_task_; @@ -134,6 +137,9 @@ ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set, : ElfWriter(), compiler_options_(compiler_options), elf_file_(elf_file), + rodata_size_(0u), + text_size_(0u), + bss_size_(0u), output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))), builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {} @@ -146,6 +152,19 @@ void ElfWriterQuick<ElfTypes>::Start() { } template <typename ElfTypes> +void ElfWriterQuick<ElfTypes>::SetLoadedSectionSizes(size_t rodata_size, + size_t text_size, + size_t bss_size) { + DCHECK_EQ(rodata_size_, 0u); + rodata_size_ = rodata_size; + DCHECK_EQ(text_size_, 0u); + text_size_ = text_size; + DCHECK_EQ(bss_size_, 0u); + bss_size_ = bss_size; + builder_->PrepareDynamicSection(elf_file_->GetPath(), rodata_size_, text_size_, bss_size_); +} + +template <typename ElfTypes> OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() { auto* rodata = builder_->GetRoData(); rodata->Start(); @@ -172,31 +191,21 @@ void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) { } template <typename ElfTypes> -void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) { - auto* bss = builder_->GetBss(); - if (bss_size != 0u) { - bss->WriteNoBitsSection(bss_size); - } -} - -template <typename ElfTypes> void ElfWriterQuick<ElfTypes>::WriteDynamicSection() { - builder_->WriteDynamicSection(elf_file_->GetPath()); + if (bss_size_ != 0u) { + builder_->GetBss()->WriteNoBitsSection(bss_size_); + } + builder_->WriteDynamicSection(); } template <typename ElfTypes> void ElfWriterQuick<ElfTypes>::PrepareDebugInfo( - size_t rodata_section_size, - size_t text_section_size, const ArrayRef<const debug::MethodDebugInfo>& method_infos) { if (!method_infos.empty() && compiler_options_->GetGenerateMiniDebugInfo()) { // Prepare the mini-debug-info in background while we do other I/O. Thread* self = Thread::Current(); debug_info_task_ = std::unique_ptr<DebugInfoTask>( - new DebugInfoTask(builder_->GetIsa(), - rodata_section_size, - text_section_size, - method_infos)); + new DebugInfoTask(builder_->GetIsa(), rodata_size_, text_size_, method_infos)); debug_info_thread_pool_ = std::unique_ptr<ThreadPool>( new ThreadPool("Mini-debug-info writer", 1)); debug_info_thread_pool_->AddTask(self, debug_info_task_.get()); @@ -245,6 +254,11 @@ OutputStream* ElfWriterQuick<ElfTypes>::GetStream() { return builder_->GetStream(); } +template <typename ElfTypes> +size_t ElfWriterQuick<ElfTypes>::GetLoadedSize() { + return builder_->GetLoadedSize(); +} + // Explicit instantiations template class ElfWriterQuick<ElfTypes32>; template class ElfWriterQuick<ElfTypes64>; diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 4920f9baa5..992af29545 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -28,6 +28,7 @@ #include "elf_writer_quick.h" #include "gc/space/image_space.h" #include "image_writer.h" +#include "linker/multi_oat_relative_patcher.h" #include "lock_word.h" #include "mirror/object-inl.h" #include "oat_writer.h" @@ -72,10 +73,10 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str())); const uintptr_t requested_image_base = ART_BASE_ADDRESS; - std::unordered_map<const DexFile*, const char*> dex_file_to_oat_filename_map; + std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map; std::vector<const char*> oat_filename_vector(1, oat_filename.c_str()); for (const DexFile* dex_file : class_linker->GetBootClassPath()) { - dex_file_to_oat_filename_map.emplace(dex_file, oat_filename.c_str()); + dex_file_to_oat_index_map.emplace(dex_file, 0); } std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base, @@ -83,7 +84,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { /*compile_app_image*/false, storage_mode, oat_filename_vector, - dex_file_to_oat_filename_map)); + dex_file_to_oat_index_map)); // TODO: compile_pic should be a test argument. { { @@ -123,10 +124,22 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { &opened_dex_files_map, &opened_dex_files); ASSERT_TRUE(dex_files_ok); - oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files); + bool image_space_ok = writer->PrepareImageAddressSpace(); ASSERT_TRUE(image_space_ok); + linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(), + instruction_set_features_.get()); + oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files, &patcher); + size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset(); + size_t text_size = oat_writer.GetSize() - rodata_size; + elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer.GetBssSize()); + + writer->UpdateOatFileLayout(/* oat_index */ 0u, + elf_writer->GetLoadedSize(), + oat_writer.GetOatDataOffset(), + oat_writer.GetSize()); + bool rodata_ok = oat_writer.WriteRodata(rodata); ASSERT_TRUE(rodata_ok); elf_writer->EndRoData(rodata); @@ -139,13 +152,13 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); ASSERT_TRUE(header_ok); - elf_writer->SetBssSize(oat_writer.GetBssSize()); + writer->UpdateOatFileHeader(/* oat_index */ 0u, oat_writer.GetOatHeader()); + elf_writer->WriteDynamicSection(); elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo()); elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations()); bool success = elf_writer->End(); - ASSERT_TRUE(success); } } @@ -158,12 +171,10 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { std::vector<const char*> dup_image_filename(1, image_file.GetFilename().c_str()); bool success_image = writer->Write(kInvalidFd, dup_image_filename, - kInvalidFd, - dup_oat_filename, - dup_oat_filename[0]); + dup_oat_filename); ASSERT_TRUE(success_image); bool success_fixup = ElfWriter::Fixup(dup_oat.get(), - writer->GetOatDataBegin(dup_oat_filename[0])); + writer->GetOatDataBegin(0)); ASSERT_TRUE(success_fixup); ASSERT_EQ(dup_oat->FlushCloseOrErase(), 0) << "Could not flush and close oat file " diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index d50528edee..5eff8f37ec 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -162,9 +162,7 @@ bool ImageWriter::PrepareImageAddressSpace() { bool ImageWriter::Write(int image_fd, const std::vector<const char*>& image_filenames, - int oat_fd, - const std::vector<const char*>& oat_filenames, - const std::string& oat_location) { + const std::vector<const char*>& oat_filenames) { // If image_fd or oat_fd are not kInvalidFd then we may have empty strings in image_filenames or // oat_filenames. CHECK(!image_filenames.empty()); @@ -172,95 +170,13 @@ bool ImageWriter::Write(int image_fd, CHECK_EQ(image_filenames.size(), 1u); } CHECK(!oat_filenames.empty()); - if (oat_fd != kInvalidFd) { - CHECK_EQ(oat_filenames.size(), 1u); - } CHECK_EQ(image_filenames.size(), oat_filenames.size()); - size_t oat_file_offset = 0; - - for (size_t i = 0; i < oat_filenames.size(); ++i) { - const char* oat_filename = oat_filenames[i]; - std::unique_ptr<File> oat_file; - - if (oat_fd != -1) { - if (strlen(oat_filename) == 0u) { - oat_file.reset(new File(oat_fd, false)); - } else { - oat_file.reset(new File(oat_fd, oat_filename, false)); - } - int length = oat_file->GetLength(); - if (length < 0) { - PLOG(ERROR) << "Oat file has negative length " << length; - return false; - } else { - // Leave the fd open since dex2oat still needs to write out the oat file with the fd. - oat_file->DisableAutoClose(); - } - } else { - oat_file.reset(OS::OpenFileReadWrite(oat_filename)); - } - if (oat_file == nullptr) { - PLOG(ERROR) << "Failed to open oat file " << oat_filename; - return false; - } - std::string error_msg; - oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_filename, nullptr, &error_msg); - if (oat_file_ == nullptr) { - PLOG(ERROR) << "Failed to open writable oat file " << oat_filename; - oat_file->Erase(); - return false; - } - Runtime::Current()->GetOatFileManager().RegisterOatFile( - std::unique_ptr<const OatFile>(oat_file_)); - - const OatHeader& oat_header = oat_file_->GetOatHeader(); - ImageInfo& image_info = GetImageInfo(oat_filename); - - size_t oat_loaded_size = 0; - size_t oat_data_offset = 0; - ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset); - - DCHECK_EQ(image_info.oat_offset_, oat_file_offset); - oat_file_offset += oat_loaded_size; - - if (i == 0) { - // Primary oat file, read the trampolines. - image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] = - oat_header.GetInterpreterToInterpreterBridgeOffset(); - image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] = - oat_header.GetInterpreterToCompiledCodeBridgeOffset(); - image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] = - oat_header.GetJniDlsymLookupOffset(); - image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] = - oat_header.GetQuickGenericJniTrampolineOffset(); - image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] = - oat_header.GetQuickImtConflictTrampolineOffset(); - image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] = - oat_header.GetQuickResolutionTrampolineOffset(); - image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = - oat_header.GetQuickToInterpreterBridgeOffset(); - } - - - { - ScopedObjectAccess soa(Thread::Current()); - CreateHeader(oat_loaded_size, oat_data_offset); - CopyAndFixupNativeData(); - } - - SetOatChecksumFromElfFile(oat_file.get()); - - if (oat_fd != -1) { - // Leave fd open for caller. - if (oat_file->Flush() != 0) { - LOG(ERROR) << "Failed to flush oat file " << oat_filename << " for " << oat_location; - return false; - } - } else if (oat_file->FlushCloseOrErase() != 0) { - LOG(ERROR) << "Failed to flush and close oat file " << oat_filename - << " for " << oat_location; - return false; + { + ScopedObjectAccess soa(Thread::Current()); + for (size_t i = 0; i < oat_filenames.size(); ++i) { + CreateHeader(i); + CopyAndFixupNativeData(i); } } @@ -273,8 +189,7 @@ bool ImageWriter::Write(int image_fd, for (size_t i = 0; i < image_filenames.size(); ++i) { const char* image_filename = image_filenames[i]; - const char* oat_filename = oat_filenames[i]; - ImageInfo& image_info = GetImageInfo(oat_filename); + ImageInfo& image_info = GetImageInfo(i); std::unique_ptr<File> image_file; if (image_fd != kInvalidFd) { if (strlen(image_filename) == 0u) { @@ -396,8 +311,8 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot DCHECK(object != nullptr); DCHECK_NE(image_objects_offset_begin_, 0u); - const char* oat_filename = GetOatFilename(object); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + ImageInfo& image_info = GetImageInfo(oat_index); size_t bin_slot_offset = image_info.bin_slot_offsets_[bin_slot.GetBin()]; size_t new_offset = bin_slot_offset + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); @@ -417,8 +332,8 @@ size_t ImageWriter::GetImageOffset(mirror::Object* object) const { DCHECK(IsImageOffsetAssigned(object)); LockWord lock_word = object->GetLockWord(false); size_t offset = lock_word.ForwardingAddress(); - const char* oat_filename = GetOatFilename(object); - const ImageInfo& image_info = GetConstImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + const ImageInfo& image_info = GetImageInfo(oat_index); DCHECK_LT(offset, image_info.image_end_); return offset; } @@ -461,8 +376,8 @@ void ImageWriter::PrepareDexCacheArraySlots() { // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() // when AssignImageBinSlot() assigns their indexes out or order. for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) { - auto it = dex_file_oat_filename_map_.find(dex_file); - DCHECK(it != dex_file_oat_filename_map_.end()) << dex_file->GetLocation(); + auto it = dex_file_oat_index_map_.find(dex_file); + DCHECK(it != dex_file_oat_index_map_.end()) << dex_file->GetLocation(); ImageInfo& image_info = GetImageInfo(it->second); image_info.dex_cache_array_starts_.Put(dex_file, image_info.bin_slot_sizes_[kBinDexCacheArray]); DexCacheArraysLayout layout(target_ptr_size_, dex_file); @@ -481,8 +396,8 @@ void ImageWriter::PrepareDexCacheArraySlots() { const DexFile* dex_file = dex_cache->GetDexFile(); DexCacheArraysLayout layout(target_ptr_size_, dex_file); DCHECK(layout.Valid()); - const char* oat_filename = GetOatFilenameForDexCache(dex_cache); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndexForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_index); uint32_t start = image_info.dex_cache_array_starts_.Get(dex_file); DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr); AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), @@ -504,9 +419,9 @@ void ImageWriter::PrepareDexCacheArraySlots() { void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset, DexCache* dex_cache) { if (array != nullptr) { DCHECK(!IsInBootImage(array)); - const char* oat_filename = GetOatFilenameForDexCache(dex_cache); + size_t oat_index = GetOatIndexForDexCache(dex_cache); native_object_relocations_.emplace(array, - NativeObjectRelocation { oat_filename, offset, kNativeObjectRelocationTypeDexCacheArray }); + NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeDexCacheArray }); } } @@ -621,8 +536,8 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { } // else bin = kBinRegular } - const char* oat_filename = GetOatFilename(object); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + ImageInfo& image_info = GetImageInfo(oat_index); size_t offset_delta = RoundUp(object_size, kObjectAlignment); // 64-bit alignment current_offset = image_info.bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). @@ -658,8 +573,8 @@ bool ImageWriter::IsImageBinSlotAssigned(mirror::Object* object) const { LockWord lock_word = object->GetLockWord(false); size_t offset = lock_word.ForwardingAddress(); BinSlot bin_slot(offset); - const char* oat_filename = GetOatFilename(object); - const ImageInfo& image_info = GetConstImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + const ImageInfo& image_info = GetImageInfo(oat_index); DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]) << "bin slot offset should not exceed the size of that bin"; } @@ -675,16 +590,15 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const DCHECK_LE(offset, std::numeric_limits<uint32_t>::max()); BinSlot bin_slot(static_cast<uint32_t>(offset)); - const char* oat_filename = GetOatFilename(object); - const ImageInfo& image_info = GetConstImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + const ImageInfo& image_info = GetImageInfo(oat_index); DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]); return bin_slot; } bool ImageWriter::AllocMemory() { - for (const char* oat_filename : oat_filenames_) { - ImageInfo& image_info = GetImageInfo(oat_filename); + for (ImageInfo& image_info : image_infos_) { ImageSection unused_sections[ImageHeader::kSectionCount]; const size_t length = RoundUp( image_info.CreateImageSections(target_ptr_size_, unused_sections), @@ -917,7 +831,7 @@ void ImageWriter::PruneNonImageClasses() { // Copied methods may be held live by a class which was not an image class but have a // declaring class which is an image class. Set it to the resolution method to be safe and // prevent dangling pointers. - if (method->MightBeCopied() || !KeepClass(declaring_class)) { + if (method->IsCopied() || !KeepClass(declaring_class)) { mirror::DexCache::SetElementPtrSize(resolved_methods, i, resolution_method, @@ -977,8 +891,7 @@ void ImageWriter::DumpImageClasses() { mirror::String* ImageWriter::FindInternedString(mirror::String* string) { Thread* const self = Thread::Current(); - for (auto& pair : image_info_map_) { - const ImageInfo& image_info = pair.second; + for (const ImageInfo& image_info : image_infos_) { mirror::String* const found = image_info.intern_table_->LookupStrong(self, string); DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr) << string->ToModifiedUtf8(); @@ -1005,8 +918,8 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK(obj != nullptr); // if it is a string, we want to intern it if its not interned. if (obj->GetClass()->IsStringClass()) { - const char* oat_filename = GetOatFilename(obj); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndex(obj); + ImageInfo& image_info = GetImageInfo(oat_index); // we must be an interned string that was forward referenced and already assigned if (IsImageBinSlotAssigned(obj)) { @@ -1035,7 +948,7 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { AssignImageBinSlot(obj); } -ObjectArray<Object>* ImageWriter::CreateImageRoots(const char* oat_filename) const { +ObjectArray<Object>* ImageWriter::CreateImageRoots(size_t oat_index) const { Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); Thread* self = Thread::Current(); @@ -1044,10 +957,10 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots(const char* oat_filename) con class_linker->FindSystemClass(self, "[Ljava/lang/Object;"))); std::unordered_set<const DexFile*> image_dex_files; - for (auto& pair : dex_file_oat_filename_map_) { + for (auto& pair : dex_file_oat_index_map_) { const DexFile* image_dex_file = pair.first; - const char* image_oat_filename = pair.second; - if (strcmp(oat_filename, image_oat_filename) == 0) { + size_t image_oat_index = pair.second; + if (oat_index == image_oat_index) { image_dex_files.insert(image_dex_file); } } @@ -1172,8 +1085,8 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { LengthPrefixedArray<ArtField>* fields[] = { as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(), }; - const char* oat_file = GetOatFilenameForDexCache(dex_cache); - ImageInfo& image_info = GetImageInfo(oat_file); + size_t oat_index = GetOatIndexForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_index); { // Note: This table is only accessed from the image writer, so the lock is technically // unnecessary. @@ -1191,8 +1104,11 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { << " already forwarded"; size_t& offset = image_info.bin_slot_sizes_[kBinArtField]; DCHECK(!IsInBootImage(cur_fields)); - native_object_relocations_.emplace(cur_fields, - NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtFieldArray }); + native_object_relocations_.emplace( + cur_fields, + NativeObjectRelocation { + oat_index, offset, kNativeObjectRelocationTypeArtFieldArray + }); offset += header_size; // Forward individual fields so that we can quickly find where they belong. for (size_t i = 0, count = cur_fields->size(); i < count; ++i) { @@ -1202,8 +1118,9 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i << " already assigned " << PrettyField(field) << " static=" << field->IsStatic(); DCHECK(!IsInBootImage(field)); - native_object_relocations_.emplace(field, - NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtField }); + native_object_relocations_.emplace( + field, + NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeArtField }); offset += sizeof(ArtField); } } @@ -1236,13 +1153,13 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { DCHECK(!IsInBootImage(array)); native_object_relocations_.emplace(array, NativeObjectRelocation { - oat_file, + oat_index, offset, any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty : kNativeObjectRelocationTypeArtMethodArrayClean }); offset += header_size; for (auto& m : as_klass->GetMethods(target_ptr_size_)) { - AssignMethodOffset(&m, type, oat_file); + AssignMethodOffset(&m, type, oat_index); } (any_dirty ? dirty_methods_ : clean_methods_) += num_methods; } @@ -1270,14 +1187,14 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type, - const char* oat_filename) { + size_t oat_index) { DCHECK(!IsInBootImage(method)); auto it = native_object_relocations_.find(method); CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned " << PrettyMethod(method); - ImageInfo& image_info = GetImageInfo(oat_filename); + ImageInfo& image_info = GetImageInfo(oat_index); size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)]; - native_object_relocations_.emplace(method, NativeObjectRelocation { oat_filename, offset, type }); + native_object_relocations_.emplace(method, NativeObjectRelocation { oat_index, offset, type }); offset += ArtMethod::Size(target_ptr_size_); } @@ -1312,9 +1229,8 @@ void ImageWriter::CalculateNewObjectOffsets() { Thread* const self = Thread::Current(); StackHandleScopeCollection handles(self); std::vector<Handle<ObjectArray<Object>>> image_roots; - for (const char* oat_filename : oat_filenames_) { - std::string image_filename = oat_filename; - image_roots.push_back(handles.NewHandle(CreateImageRoots(image_filename.c_str()))); + for (size_t i = 0, size = oat_filenames_.size(); i != size; ++i) { + image_roots.push_back(handles.NewHandle(CreateImageRoots(i))); } auto* runtime = Runtime::Current(); @@ -1340,12 +1256,12 @@ void ImageWriter::CalculateNewObjectOffsets() { const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean; auto it = native_object_relocations_.find(&image_method_array_); CHECK(it == native_object_relocations_.end()); - ImageInfo& default_image_info = GetImageInfo(default_oat_filename_); + ImageInfo& default_image_info = GetImageInfo(GetDefaultOatIndex()); size_t& offset = default_image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)]; if (!compile_app_image_) { native_object_relocations_.emplace(&image_method_array_, - NativeObjectRelocation { default_oat_filename_, offset, image_method_type }); + NativeObjectRelocation { GetDefaultOatIndex(), offset, image_method_type }); } size_t method_alignment = ArtMethod::Alignment(target_ptr_size_); const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize( @@ -1357,15 +1273,14 @@ void ImageWriter::CalculateNewObjectOffsets() { CHECK(m->IsRuntimeMethod()); DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image"; if (!IsInBootImage(m)) { - AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean, default_oat_filename_); + AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean, GetDefaultOatIndex()); } } // Calculate size of the dex cache arrays slot and prepare offsets. PrepareDexCacheArraySlots(); // Calculate the sizes of the intern tables and class tables. - for (const char* oat_filename : oat_filenames_) { - ImageInfo& image_info = GetImageInfo(oat_filename); + for (ImageInfo& image_info : image_infos_) { // Calculate how big the intern table will be after being serialized. InternTable* const intern_table = image_info.intern_table_.get(); CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; @@ -1376,8 +1291,7 @@ void ImageWriter::CalculateNewObjectOffsets() { } // Calculate bin slot offsets. - for (const char* oat_filename : oat_filenames_) { - ImageInfo& image_info = GetImageInfo(oat_filename); + for (ImageInfo& image_info : image_infos_) { size_t bin_offset = image_objects_offset_begin_; for (size_t i = 0; i != kBinSize; ++i) { image_info.bin_slot_offsets_[i] = bin_offset; @@ -1397,8 +1311,7 @@ void ImageWriter::CalculateNewObjectOffsets() { // Calculate image offsets. size_t image_offset = 0; - for (const char* oat_filename : oat_filenames_) { - ImageInfo& image_info = GetImageInfo(oat_filename); + for (ImageInfo& image_info : image_infos_) { image_info.image_begin_ = global_image_begin_ + image_offset; image_info.image_offset_ = image_offset; ImageSection unused_sections[ImageHeader::kSectionCount]; @@ -1415,8 +1328,7 @@ void ImageWriter::CalculateNewObjectOffsets() { // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); size_t i = 0; - for (const char* oat_filename : oat_filenames_) { - ImageInfo& image_info = GetImageInfo(oat_filename); + for (ImageInfo& image_info : image_infos_) { image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get())); i++; } @@ -1425,7 +1337,7 @@ void ImageWriter::CalculateNewObjectOffsets() { for (auto& pair : native_object_relocations_) { NativeObjectRelocation& relocation = pair.second; Bin bin_type = BinTypeForNativeRelocationType(relocation.type); - ImageInfo& image_info = GetImageInfo(relocation.oat_filename); + ImageInfo& image_info = GetImageInfo(relocation.oat_index); relocation.offset += image_info.bin_slot_offsets_[bin_type]; } @@ -1474,15 +1386,11 @@ size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size, return cur_pos; } -void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { - CHECK_NE(0U, oat_loaded_size); - const char* oat_filename = oat_file_->GetLocation().c_str(); - ImageInfo& image_info = GetImageInfo(oat_filename); - const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename); - const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size; - image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset; - const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size(); - image_info.oat_size_ = oat_file_->Size(); +void ImageWriter::CreateHeader(size_t oat_index) { + ImageInfo& image_info = GetImageInfo(oat_index); + const uint8_t* oat_file_begin = image_info.oat_file_begin_; + const uint8_t* oat_file_end = oat_file_begin + image_info.oat_loaded_size_; + const uint8_t* oat_data_end = image_info.oat_data_begin_ + image_info.oat_size_; // Create the image sections. ImageSection sections[ImageHeader::kSectionCount]; @@ -1493,7 +1401,7 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize)); if (VLOG_IS_ON(compiler)) { - LOG(INFO) << "Creating header for " << oat_filename; + LOG(INFO) << "Creating header for " << oat_filenames_[oat_index]; size_t idx = 0; for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; @@ -1522,7 +1430,7 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { image_end, sections, image_info.image_roots_address_, - oat_file_->GetOatHeader().GetChecksum(), + image_info.oat_checksum_, PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(image_info.oat_data_begin_), PointerToLowMemUInt32(oat_data_end), @@ -1541,8 +1449,8 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { auto it = native_object_relocations_.find(method); CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method; - const char* oat_filename = GetOatFilename(method->GetDexCache()); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndex(method->GetDexCache()); + ImageInfo& image_info = GetImageInfo(oat_index); CHECK_GE(it->second.offset, image_info.image_end_) << "ArtMethods should be after Objects"; return reinterpret_cast<ArtMethod*>(image_info.image_begin_ + it->second.offset); } @@ -1571,14 +1479,13 @@ class FixupRootVisitor : public RootVisitor { ImageWriter* const image_writer_; }; -void ImageWriter::CopyAndFixupNativeData() { - const char* oat_filename = oat_file_->GetLocation().c_str(); - ImageInfo& image_info = GetImageInfo(oat_filename); +void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { + ImageInfo& image_info = GetImageInfo(oat_index); // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_relocations_) { NativeObjectRelocation& relocation = pair.second; // Only work with fields and methods that are in the current oat file. - if (strcmp(relocation.oat_filename, oat_filename) != 0) { + if (relocation.oat_index != oat_index) { continue; } auto* dest = image_info.image_->Begin() + relocation.offset; @@ -1624,7 +1531,7 @@ void ImageWriter::CopyAndFixupNativeData() { ArtMethod* method = image_methods_[i]; CHECK(method != nullptr); // Only place runtime methods in the image of the default oat file. - if (method->IsRuntimeMethod() && strcmp(default_oat_filename_, oat_filename) != 0) { + if (method->IsRuntimeMethod() && oat_index != GetDefaultOatIndex()) { continue; } if (!IsInBootImage(method)) { @@ -1729,7 +1636,7 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } UNREACHABLE(); } else { - ImageInfo& image_info = GetImageInfo(it->second.oat_filename); + ImageInfo& image_info = GetImageInfo(it->second.oat_index); elem = image_info.image_begin_ + it->second.offset; } } @@ -1742,8 +1649,8 @@ void ImageWriter::CopyAndFixupObject(Object* obj) { return; } size_t offset = GetImageOffset(obj); - const char* oat_filename = GetOatFilename(obj); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndex(obj); + ImageInfo& image_info = GetImageInfo(oat_index); auto* dst = reinterpret_cast<Object*>(image_info.image_->Begin() + offset); DCHECK_LT(offset, image_info.image_end_); const auto* src = reinterpret_cast<const uint8_t*>(obj); @@ -1835,7 +1742,7 @@ T* ImageWriter::NativeLocationInImage(T* obj) { CHECK(it != native_object_relocations_.end()) << obj << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces(); const NativeObjectRelocation& relocation = it->second; - ImageInfo& image_info = GetImageInfo(relocation.oat_filename); + ImageInfo& image_info = GetImageInfo(relocation.oat_index); return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset); } } @@ -1845,8 +1752,8 @@ T* ImageWriter::NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) { if (obj == nullptr || IsInBootImage(obj)) { return obj; } else { - const char* oat_filename = GetOatFilenameForDexCache(dex_cache); - ImageInfo& image_info = GetImageInfo(oat_filename); + size_t oat_index = GetOatIndexForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_index); return reinterpret_cast<T*>(image_info.image_->Begin() + NativeOffsetInImage(obj)); } } @@ -2044,9 +1951,19 @@ const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, // trampoline. // Quick entrypoint: - uint32_t quick_oat_code_offset = PointerToLowMemUInt32( - method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_)); - const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset, image_info); + const void* quick_oat_entry_point = + method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_); + const uint8_t* quick_code; + + if (UNLIKELY(IsInBootImage(method->GetDeclaringClass()))) { + DCHECK(method->IsCopied()); + // If the code is not in the oat file corresponding to this image (e.g. default methods) + quick_code = reinterpret_cast<const uint8_t*>(quick_oat_entry_point); + } else { + uint32_t quick_oat_code_offset = PointerToLowMemUInt32(quick_oat_entry_point); + quick_code = GetOatAddressForOffset(quick_oat_code_offset, image_info); + } + *quick_is_interpreted = false; if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) { @@ -2129,34 +2046,6 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, } } -static OatHeader* GetOatHeaderFromElf(ElfFile* elf) { - uint64_t data_sec_offset; - bool has_data_sec = elf->GetSectionOffsetAndSize(".rodata", &data_sec_offset, nullptr); - if (!has_data_sec) { - return nullptr; - } - return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec_offset); -} - -void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) { - std::string error_msg; - std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file, - PROT_READ | PROT_WRITE, - MAP_SHARED, - &error_msg)); - if (elf.get() == nullptr) { - LOG(FATAL) << "Unable open oat file: " << error_msg; - return; - } - OatHeader* oat_header = GetOatHeaderFromElf(elf.get()); - CHECK(oat_header != nullptr); - CHECK(oat_header->IsValid()); - - ImageInfo& image_info = GetImageInfo(oat_file_->GetLocation().c_str()); - ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin()); - image_header->SetOatChecksum(oat_header->GetChecksum()); -} - size_t ImageWriter::GetBinSizeSum(ImageWriter::ImageInfo& image_info, ImageWriter::Bin up_to) const { DCHECK_LE(up_to, kBinSize); return std::accumulate(&image_info.bin_slot_sizes_[0], @@ -2187,19 +2076,6 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } -uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const { - uintptr_t last_image_end = 0; - for (const char* oat_fn : oat_filenames_) { - const ImageInfo& image_info = GetConstImageInfo(oat_fn); - DCHECK(image_info.image_begin_ != nullptr); - uintptr_t this_end = reinterpret_cast<uintptr_t>(image_info.image_begin_) + - image_info.image_size_; - last_image_end = std::max(this_end, last_image_end); - } - const ImageInfo& image_info = GetConstImageInfo(oat_filename); - return reinterpret_cast<uint8_t*>(last_image_end) + image_info.oat_offset_; -} - ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) { switch (type) { case kNativeObjectRelocationTypeArtField: @@ -2217,91 +2093,110 @@ ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocat UNREACHABLE(); } -const char* ImageWriter::GetOatFilename(mirror::Object* obj) const { +size_t ImageWriter::GetOatIndex(mirror::Object* obj) const { if (compile_app_image_) { - return default_oat_filename_; + return GetDefaultOatIndex(); } else { - return GetOatFilenameForDexCache(obj->IsDexCache() ? obj->AsDexCache() : - obj->IsClass() ? obj->AsClass()->GetDexCache() : obj->GetClass()->GetDexCache()); + mirror::DexCache* dex_cache = + obj->IsDexCache() ? obj->AsDexCache() + : obj->IsClass() ? obj->AsClass()->GetDexCache() + : obj->GetClass()->GetDexCache(); + return GetOatIndexForDexCache(dex_cache); } } -const char* ImageWriter::GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const { - if (compile_app_image_ || dex_cache == nullptr) { - return default_oat_filename_; +size_t ImageWriter::GetOatIndexForDexFile(const DexFile* dex_file) const { + if (compile_app_image_) { + return GetDefaultOatIndex(); } else { - auto it = dex_file_oat_filename_map_.find(dex_cache->GetDexFile()); - DCHECK(it != dex_file_oat_filename_map_.end()) << dex_cache->GetDexFile()->GetLocation(); + auto it = dex_file_oat_index_map_.find(dex_file); + DCHECK(it != dex_file_oat_index_map_.end()) << dex_file->GetLocation(); return it->second; } } -ImageWriter::ImageInfo& ImageWriter::GetImageInfo(const char* oat_filename) { - auto it = image_info_map_.find(oat_filename); - DCHECK(it != image_info_map_.end()); - return it->second; +size_t ImageWriter::GetOatIndexForDexCache(mirror::DexCache* dex_cache) const { + if (dex_cache == nullptr) { + return GetDefaultOatIndex(); + } else { + return GetOatIndexForDexFile(dex_cache->GetDexFile()); + } } -const ImageWriter::ImageInfo& ImageWriter::GetConstImageInfo(const char* oat_filename) const { - auto it = image_info_map_.find(oat_filename); - DCHECK(it != image_info_map_.end()); - return it->second; -} +void ImageWriter::UpdateOatFileLayout(size_t oat_index, + size_t oat_loaded_size, + size_t oat_data_offset, + size_t oat_data_size) { + const uint8_t* images_end = image_infos_.back().image_begin_ + image_infos_.back().image_size_; + for (const ImageInfo& info : image_infos_) { + DCHECK_LE(info.image_begin_ + info.image_size_, images_end); + } + DCHECK(images_end != nullptr); // Image space must be ready. -const ImageWriter::ImageInfo& ImageWriter::GetImageInfo(size_t index) const { - DCHECK_LT(index, oat_filenames_.size()); - return GetConstImageInfo(oat_filenames_[index]); -} + ImageInfo& cur_image_info = GetImageInfo(oat_index); + cur_image_info.oat_file_begin_ = images_end + cur_image_info.oat_offset_; + cur_image_info.oat_loaded_size_ = oat_loaded_size; + cur_image_info.oat_data_begin_ = cur_image_info.oat_file_begin_ + oat_data_offset; + cur_image_info.oat_size_ = oat_data_size; -void ImageWriter::UpdateOatFile(File* oat_file, const char* oat_filename) { - DCHECK(oat_file != nullptr); if (compile_app_image_) { CHECK_EQ(oat_filenames_.size(), 1u) << "App image should have no next image."; return; } - ImageInfo& cur_image_info = GetImageInfo(oat_filename); // Update the oat_offset of the next image info. - auto it = std::find(oat_filenames_.begin(), oat_filenames_.end(), oat_filename); - DCHECK(it != oat_filenames_.end()); - - it++; - if (it != oat_filenames_.end()) { - size_t oat_loaded_size = 0; - size_t oat_data_offset = 0; - ElfWriter::GetOatElfInformation(oat_file, &oat_loaded_size, &oat_data_offset); + if (oat_index + 1u != oat_filenames_.size()) { // There is a following one. - ImageInfo& next_image_info = GetImageInfo(*it); + ImageInfo& next_image_info = GetImageInfo(oat_index + 1u); next_image_info.oat_offset_ = cur_image_info.oat_offset_ + oat_loaded_size; } } +void ImageWriter::UpdateOatFileHeader(size_t oat_index, const OatHeader& oat_header) { + ImageInfo& cur_image_info = GetImageInfo(oat_index); + cur_image_info.oat_checksum_ = oat_header.GetChecksum(); + + if (oat_index == GetDefaultOatIndex()) { + // Primary oat file, read the trampolines. + cur_image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] = + oat_header.GetInterpreterToInterpreterBridgeOffset(); + cur_image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] = + oat_header.GetInterpreterToCompiledCodeBridgeOffset(); + cur_image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] = + oat_header.GetJniDlsymLookupOffset(); + cur_image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] = + oat_header.GetQuickGenericJniTrampolineOffset(); + cur_image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] = + oat_header.GetQuickImtConflictTrampolineOffset(); + cur_image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] = + oat_header.GetQuickResolutionTrampolineOffset(); + cur_image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = + oat_header.GetQuickToInterpreterBridgeOffset(); + } +} + ImageWriter::ImageWriter( const CompilerDriver& compiler_driver, uintptr_t image_begin, bool compile_pic, bool compile_app_image, ImageHeader::StorageMode image_storage_mode, - const std::vector<const char*> oat_filenames, - const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map) + const std::vector<const char*>& oat_filenames, + const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map) : compiler_driver_(compiler_driver), global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)), image_objects_offset_begin_(0), - oat_file_(nullptr), compile_pic_(compile_pic), compile_app_image_(compile_app_image), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), + image_infos_(oat_filenames.size()), image_method_array_(ImageHeader::kImageMethodsCount), dirty_methods_(0u), clean_methods_(0u), image_storage_mode_(image_storage_mode), - dex_file_oat_filename_map_(dex_file_oat_filename_map), oat_filenames_(oat_filenames), - default_oat_filename_(oat_filenames[0]) { + dex_file_oat_index_map_(dex_file_oat_index_map) { CHECK_NE(image_begin, 0U); - for (const char* oat_filename : oat_filenames) { - image_info_map_.emplace(oat_filename, ImageInfo()); - } std::fill_n(image_methods_, arraysize(image_methods_), nullptr); CHECK_EQ(compile_app_image, !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty()) << "Compiling a boot image should occur iff there are no boot image spaces loaded"; diff --git a/compiler/image_writer.h b/compiler/image_writer.h index ee204c5081..dba9dd71fc 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -27,6 +27,7 @@ #include <ostream> #include "base/bit_utils.h" +#include "base/dchecked_vector.h" #include "base/length_prefixed_array.h" #include "base/macros.h" #include "driver/compiler_driver.h" @@ -59,20 +60,19 @@ class ImageWriter FINAL { bool compile_pic, bool compile_app_image, ImageHeader::StorageMode image_storage_mode, - const std::vector<const char*> oat_filenames, - const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map); + const std::vector<const char*>& oat_filenames, + const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map); bool PrepareImageAddressSpace(); bool IsImageAddressSpaceReady() const { - bool ready = !image_info_map_.empty(); - for (auto& pair : image_info_map_) { - const ImageInfo& image_info = pair.second; + DCHECK(!image_infos_.empty()); + for (const ImageInfo& image_info : image_infos_) { if (image_info.image_roots_address_ == 0u) { return false; } } - return ready; + return true; } template <typename T> @@ -80,8 +80,8 @@ class ImageWriter FINAL { if (object == nullptr || IsInBootImage(object)) { return object; } else { - const char* oat_filename = GetOatFilename(object); - const ImageInfo& image_info = GetConstImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + const ImageInfo& image_info = GetImageInfo(oat_index); return reinterpret_cast<T*>(image_info.image_begin_ + GetImageOffset(object)); } } @@ -91,9 +91,9 @@ class ImageWriter FINAL { template <typename PtrType> PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset) const SHARED_REQUIRES(Locks::mutator_lock_) { - auto oat_it = dex_file_oat_filename_map_.find(dex_file); - DCHECK(oat_it != dex_file_oat_filename_map_.end()); - const ImageInfo& image_info = GetConstImageInfo(oat_it->second); + auto oat_it = dex_file_oat_index_map_.find(dex_file); + DCHECK(oat_it != dex_file_oat_index_map_.end()); + const ImageInfo& image_info = GetImageInfo(oat_it->second); auto it = image_info.dex_cache_array_starts_.find(dex_file); DCHECK(it != image_info.dex_cache_array_starts_.end()); return reinterpret_cast<PtrType>( @@ -101,7 +101,13 @@ class ImageWriter FINAL { it->second + offset); } - uint8_t* GetOatFileBegin(const char* oat_filename) const; + size_t GetOatFileOffset(size_t oat_index) const { + return GetImageInfo(oat_index).oat_offset_; + } + + const uint8_t* GetOatFileBegin(size_t oat_index) const { + return GetImageInfo(oat_index).oat_file_begin_; + } // If image_fd is not kInvalidFd, then we use that for the image file. Otherwise we open // the names in image_filenames. @@ -109,21 +115,32 @@ class ImageWriter FINAL { // the names in oat_filenames. bool Write(int image_fd, const std::vector<const char*>& image_filenames, - int oat_fd, - const std::vector<const char*>& oat_filenames, - const std::string& oat_location) + const std::vector<const char*>& oat_filenames) REQUIRES(!Locks::mutator_lock_); - uintptr_t GetOatDataBegin(const char* oat_filename) { - return reinterpret_cast<uintptr_t>(GetImageInfo(oat_filename).oat_data_begin_); + uintptr_t GetOatDataBegin(size_t oat_index) { + return reinterpret_cast<uintptr_t>(GetImageInfo(oat_index).oat_data_begin_); } - const char* GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const + // Get the index of the oat file containing the dex file. + // + // This "oat_index" is used to retrieve information about the the memory layout + // of the oat file and its associated image file, needed for link-time patching + // of references to the image or across oat files. + size_t GetOatIndexForDexFile(const DexFile* dex_file) const; + + // Get the index of the oat file containing the dex file served by the dex cache. + size_t GetOatIndexForDexCache(mirror::DexCache* dex_cache) const SHARED_REQUIRES(Locks::mutator_lock_); - // Update the oat size for the given oat file. This will make the oat_offset for the next oat - // file valid. - void UpdateOatFile(File* oat_file, const char* oat_filename); + // Update the oat layout for the given oat file. + // This will make the oat_offset for the next oat file valid. + void UpdateOatFileLayout(size_t oat_index, + size_t oat_loaded_size, + size_t oat_data_offset, + size_t oat_data_size); + // Update information about the oat header, i.e. checksum and trampoline offsets. + void UpdateOatFileHeader(size_t oat_index, const OatHeader& oat_header); private: bool AllocMemory(); @@ -247,10 +264,13 @@ class ImageWriter FINAL { // Offset of the oat file for this image from start of oat files. This is // valid when the previous oat file has been written. size_t oat_offset_ = 0; - // Start of oatdata in the corresponding oat file. This is - // valid when the images have been layed out. - uint8_t* oat_data_begin_ = nullptr; + // Layout of the loaded ELF file containing the oat file, valid after UpdateOatFileLayout(). + const uint8_t* oat_file_begin_ = nullptr; + size_t oat_loaded_size_ = 0; + const uint8_t* oat_data_begin_ = nullptr; size_t oat_size_ = 0; // Size of the corresponding oat data. + // The oat header checksum, valid after UpdateOatFileHeader(). + uint32_t oat_checksum_ = 0u; // Image bitmap which lets us know where the objects inside of the image reside. std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_; @@ -310,8 +330,8 @@ class ImageWriter FINAL { mirror::Object* GetLocalAddress(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_) { size_t offset = GetImageOffset(object); - const char* oat_filename = GetOatFilename(object); - const ImageInfo& image_info = GetConstImageInfo(oat_filename); + size_t oat_index = GetOatIndex(object); + const ImageInfo& image_info = GetImageInfo(oat_index); uint8_t* dst = image_info.image_->Begin() + offset; return reinterpret_cast<mirror::Object*>(dst); } @@ -348,9 +368,9 @@ class ImageWriter FINAL { // Lays out where the image objects will be at runtime. void CalculateNewObjectOffsets() SHARED_REQUIRES(Locks::mutator_lock_); - void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) + void CreateHeader(size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_); - mirror::ObjectArray<mirror::Object>* CreateImageRoots(const char* oat_filename) const + mirror::ObjectArray<mirror::Object>* CreateImageRoots(size_t oat_index) const SHARED_REQUIRES(Locks::mutator_lock_); void CalculateObjectBinSlots(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); @@ -367,7 +387,7 @@ class ImageWriter FINAL { SHARED_REQUIRES(Locks::mutator_lock_); // Creates the contiguous image in memory and adjusts pointers. - void CopyAndFixupNativeData() SHARED_REQUIRES(Locks::mutator_lock_); + void CopyAndFixupNativeData(size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_); void CopyAndFixupObjects() SHARED_REQUIRES(Locks::mutator_lock_); static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_); @@ -392,9 +412,6 @@ class ImageWriter FINAL { bool* quick_is_interpreted) SHARED_REQUIRES(Locks::mutator_lock_); - // Patches references in OatFile to expect runtime addresses. - void SetOatChecksumFromElfFile(File* elf_file); - // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins. size_t GetBinSizeSum(ImageInfo& image_info, Bin up_to = kBinSize) const; @@ -404,7 +421,7 @@ class ImageWriter FINAL { // Assign the offset for an ArtMethod. void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type, - const char* oat_filename) + size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_); // Return true if klass is loaded by the boot class loader but not in the boot image. @@ -443,15 +460,21 @@ class ImageWriter FINAL { // Return true if ptr is within the boot oat file. bool IsInBootOatFile(const void* ptr) const; - const char* GetOatFilename(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); + // Get the index of the oat file associated with the object. + size_t GetOatIndex(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); - const char* GetDefaultOatFilename() const { - return default_oat_filename_; + // The oat index for shared data in multi-image and all data in single-image compilation. + size_t GetDefaultOatIndex() const { + return 0u; } - ImageInfo& GetImageInfo(const char* oat_filename); - const ImageInfo& GetConstImageInfo(const char* oat_filename) const; - const ImageInfo& GetImageInfo(size_t index) const; + ImageInfo& GetImageInfo(size_t oat_index) { + return image_infos_[oat_index]; + } + + const ImageInfo& GetImageInfo(size_t oat_index) const { + return image_infos_[oat_index]; + } // Find an already strong interned string in the other images or in the boot image. Used to // remove duplicates in the multi image and app image case. @@ -465,9 +488,6 @@ class ImageWriter FINAL { // Offset from image_begin_ to where the first object is in image_. size_t image_objects_offset_begin_; - // oat file with code for this image - OatFile* oat_file_; - // Pointer arrays that need to be updated. Since these are only some int and long arrays, we need // to keep track. These include vtable arrays, iftable arrays, and dex caches. std::unordered_map<mirror::PointerArray*, Bin> pointer_arrays_; @@ -483,14 +503,14 @@ class ImageWriter FINAL { // Size of pointers on the target architecture. size_t target_ptr_size_; - // Mapping of oat filename to image data. - std::unordered_map<std::string, ImageInfo> image_info_map_; + // Image data indexed by the oat file index. + dchecked_vector<ImageInfo> image_infos_; // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. struct NativeObjectRelocation { - const char* oat_filename; + size_t oat_index; uintptr_t offset; NativeObjectRelocationType type; @@ -522,10 +542,11 @@ class ImageWriter FINAL { // Which mode the image is stored as, see image.h const ImageHeader::StorageMode image_storage_mode_; - // Map of dex files to the oat filenames that they were compiled into. - const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map_; - const std::vector<const char*> oat_filenames_; - const char* default_oat_filename_; + // The file names of oat files. + const std::vector<const char*>& oat_filenames_; + + // Map of dex files to the indexes of oat files that they were compiled into. + const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map_; friend class ContainsBootClassLoaderNonImageClassVisitor; friend class FixupClassVisitor; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 909d6822a8..23601c39e4 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -163,7 +163,6 @@ JitCompiler::JitCompiler() : total_time_(0) { /* dump_passes */ false, cumulative_logger_.get(), /* swap_fd */ -1, - /* dex to oat map */ nullptr, /* profile_compilation_info */ nullptr)); // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index 73b0facf4b..682b008219 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -40,6 +40,11 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { MethodReference(nullptr, 0u), aligned_offset); if (needs_thunk) { + // All remaining patches will be handled by this thunk. + DCHECK(!unprocessed_patches_.empty()); + DCHECK_LE(aligned_offset - unprocessed_patches_.front().second, max_positive_displacement_); + unprocessed_patches_.clear(); + thunk_locations_.push_back(aligned_offset); offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); } diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index f80dd962ce..25fd35e1d6 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -27,18 +27,23 @@ namespace linker { class ArmBaseRelativePatcher : public RelativePatcher { public: - uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, MethodReference method_ref) OVERRIDE; uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; protected: ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set, std::vector<uint8_t> thunk_code, - uint32_t max_positive_displacement, uint32_t max_negative_displacement); + InstructionSet instruction_set, + std::vector<uint8_t> thunk_code, + uint32_t max_positive_displacement, + uint32_t max_negative_displacement); - uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, - MethodReference method_ref, uint32_t max_extra_space); + uint32_t ReserveSpaceInternal(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref, + uint32_t max_extra_space); uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset); private: diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 5f4f760c14..c090dffc55 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -28,8 +28,10 @@ Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* prov kMaxPositiveDisplacement, kMaxNegativeDisplacement) { } -void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) { +void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) { DCHECK_LE(literal_offset + 4u, code->size()); DCHECK_EQ(literal_offset & 1u, 0u); DCHECK_EQ(patch_offset & 1u, 0u); diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index 006d6fb9d5..0d903c0b41 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -26,10 +26,14 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); - void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; - void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; private: static std::vector<uint8_t> CompileThunkCode(); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 3d4c2184f1..a81c85c707 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -131,8 +131,10 @@ uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { return ArmBaseRelativePatcher::WriteThunks(out, offset); } -void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) { +void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, uint32_t + target_offset) { DCHECK_LE(literal_offset + 4u, code->size()); DCHECK_EQ(literal_offset & 3u, 0u); DCHECK_EQ(patch_offset & 3u, 0u); diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index 2d07e75c85..f9b76e6250 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -28,14 +28,19 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { Arm64RelativePatcher(RelativePatcherTargetProvider* provider, const Arm64InstructionSetFeatures* features); - uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, MethodReference method_ref) OVERRIDE; uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; - void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; private: static std::vector<uint8_t> CompileThunkCode(); diff --git a/compiler/linker/multi_oat_relative_patcher.cc b/compiler/linker/multi_oat_relative_patcher.cc new file mode 100644 index 0000000000..e9e242b658 --- /dev/null +++ b/compiler/linker/multi_oat_relative_patcher.cc @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "multi_oat_relative_patcher.h" + +#include "globals.h" +#include "base/bit_utils.h" +#include "base/logging.h" + +namespace art { +namespace linker { + +MultiOatRelativePatcher::MultiOatRelativePatcher(InstructionSet instruction_set, + const InstructionSetFeatures* features) + : method_offset_map_(), + relative_patcher_( + linker::RelativePatcher::Create(instruction_set, features, &method_offset_map_)), + adjustment_(0u), + instruction_set_(instruction_set), + start_size_code_alignment_(0u), + start_size_relative_call_thunks_(0u), + start_size_misc_thunks_(0u) { +} + +void MultiOatRelativePatcher::StartOatFile(uint32_t adjustment) { + DCHECK_ALIGNED(adjustment, kPageSize); + adjustment_ = adjustment; + + start_size_code_alignment_ = relative_patcher_->CodeAlignmentSize(); + start_size_relative_call_thunks_ = relative_patcher_->RelativeCallThunksSize(); + start_size_misc_thunks_ = relative_patcher_->MiscThunksSize(); +} + +uint32_t MultiOatRelativePatcher::CodeAlignmentSize() const { + DCHECK_GE(relative_patcher_->CodeAlignmentSize(), start_size_code_alignment_); + return relative_patcher_->CodeAlignmentSize() - start_size_code_alignment_; +} + +uint32_t MultiOatRelativePatcher::RelativeCallThunksSize() const { + DCHECK_GE(relative_patcher_->RelativeCallThunksSize(), start_size_relative_call_thunks_); + return relative_patcher_->RelativeCallThunksSize() - start_size_relative_call_thunks_; +} + +uint32_t MultiOatRelativePatcher::MiscThunksSize() const { + DCHECK_GE(relative_patcher_->MiscThunksSize(), start_size_misc_thunks_); + return relative_patcher_->MiscThunksSize() - start_size_misc_thunks_; +} + +std::pair<bool, uint32_t> MultiOatRelativePatcher::MethodOffsetMap::FindMethodOffset( + MethodReference ref) { + auto it = map.find(ref); + if (it == map.end()) { + return std::pair<bool, uint32_t>(false, 0u); + } else { + return std::pair<bool, uint32_t>(true, it->second); + } +} +} // namespace linker +} // namespace art diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h new file mode 100644 index 0000000000..1727d529fc --- /dev/null +++ b/compiler/linker/multi_oat_relative_patcher.h @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_ +#define ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_ + +#include "arch/instruction_set.h" +#include "method_reference.h" +#include "relative_patcher.h" +#include "safe_map.h" + +namespace art { + +class CompiledMethod; +class LinkerPatch; +class InstructionSetFeatures; + +namespace linker { + +// MultiOatRelativePatcher is a helper class for handling patching across +// any number of oat files. It provides storage for method code offsets +// and wraps RelativePatcher calls, adjusting relative offsets according +// to the value set by SetAdjustment(). +class MultiOatRelativePatcher FINAL { + public: + using const_iterator = + SafeMap<MethodReference, uint32_t, MethodReferenceComparator>::const_iterator; + + MultiOatRelativePatcher(InstructionSet instruction_set, const InstructionSetFeatures* features); + + // Mark the start of a new oat file (for statistics retrieval) and set the + // adjustment for a new oat file to apply to all relative offsets that are + // passed to the MultiOatRelativePatcher. + // + // The adjustment should be the global offset of the base from which relative + // offsets are calculated, such as the start of .rodata for the current oat file. + // It must must never point directly to a method's code to avoid relative offsets + // with value 0 because this value is used as a missing offset indication in + // GetOffset() and an error indication in WriteThunks(). Additionally, it must be + // page-aligned, so that it does not skew alignment calculations, say arm64 ADRP. + void StartOatFile(uint32_t adjustment); + + // Get relative offset. Returns 0 when the offset has not been set yet. + uint32_t GetOffset(MethodReference method_ref) { + auto it = method_offset_map_.map.find(method_ref); + return (it != method_offset_map_.map.end()) ? it->second - adjustment_ : 0u; + } + + // Set the offset. + void SetOffset(MethodReference method_ref, uint32_t offset) { + method_offset_map_.map.Put(method_ref, offset + adjustment_); + } + + // Wrapper around RelativePatcher::ReserveSpace(), doing offset adjustment. + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) { + offset += adjustment_; + offset = relative_patcher_->ReserveSpace(offset, compiled_method, method_ref); + offset -= adjustment_; + return offset; + } + + // Wrapper around RelativePatcher::ReserveSpaceEnd(), doing offset adjustment. + uint32_t ReserveSpaceEnd(uint32_t offset) { + offset += adjustment_; + offset = relative_patcher_->ReserveSpaceEnd(offset); + offset -= adjustment_; + return offset; + } + + // Wrapper around RelativePatcher::WriteThunks(), doing offset adjustment. + uint32_t WriteThunks(OutputStream* out, uint32_t offset) { + offset += adjustment_; + offset = relative_patcher_->WriteThunks(out, offset); + if (offset != 0u) { // 0u indicates write error. + offset -= adjustment_; + } + return offset; + } + + // Wrapper around RelativePatcher::PatchCall(), doing offset adjustment. + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) { + patch_offset += adjustment_; + target_offset += adjustment_; + relative_patcher_->PatchCall(code, literal_offset, patch_offset, target_offset); + } + + // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment. + void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + patch_offset += adjustment_; + target_offset += adjustment_; + relative_patcher_->PatchDexCacheReference(code, patch, patch_offset, target_offset); + } + + // Wrappers around RelativePatcher for statistics retrieval. + uint32_t CodeAlignmentSize() const; + uint32_t RelativeCallThunksSize() const; + uint32_t MiscThunksSize() const; + + private: + // Map method reference to assigned offset. + // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. + class MethodOffsetMap : public linker::RelativePatcherTargetProvider { + public: + std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE; + SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; + }; + + MethodOffsetMap method_offset_map_; + std::unique_ptr<RelativePatcher> relative_patcher_; + uint32_t adjustment_; + InstructionSet instruction_set_; + + uint32_t start_size_code_alignment_; + uint32_t start_size_relative_call_thunks_; + uint32_t start_size_misc_thunks_; + + friend class MultiOatRelativePatcherTest; + + DISALLOW_COPY_AND_ASSIGN(MultiOatRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_MULTI_OAT_RELATIVE_PATCHER_H_ diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc new file mode 100644 index 0000000000..792cdfe8e9 --- /dev/null +++ b/compiler/linker/multi_oat_relative_patcher_test.cc @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiled_method.h" +#include "gtest/gtest.h" +#include "multi_oat_relative_patcher.h" +#include "vector_output_stream.h" + +namespace art { +namespace linker { + +static const MethodReference kNullMethodRef = MethodReference(nullptr, 0u); + +static bool EqualRef(MethodReference lhs, MethodReference rhs) { + return lhs.dex_file == rhs.dex_file && lhs.dex_method_index == rhs.dex_method_index; +} + +class MultiOatRelativePatcherTest : public testing::Test { + protected: + class MockPatcher : public RelativePatcher { + public: + MockPatcher() { } + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref) OVERRIDE { + last_reserve_offset_ = offset; + last_reserve_method_ = method_ref; + offset += next_reserve_adjustment_; + next_reserve_adjustment_ = 0u; + return offset; + } + + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE { + last_reserve_offset_ = offset; + last_reserve_method_ = kNullMethodRef; + offset += next_reserve_adjustment_; + next_reserve_adjustment_ = 0u; + return offset; + } + + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE { + last_write_offset_ = offset; + if (next_write_alignment_ != 0u) { + offset += next_write_alignment_; + bool success = WriteCodeAlignment(out, next_write_alignment_); + CHECK(success); + next_write_alignment_ = 0u; + } + if (next_write_call_thunk_ != 0u) { + offset += next_write_call_thunk_; + std::vector<uint8_t> thunk(next_write_call_thunk_, 'c'); + bool success = WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk)); + CHECK(success); + next_write_call_thunk_ = 0u; + } + if (next_write_misc_thunk_ != 0u) { + offset += next_write_misc_thunk_; + std::vector<uint8_t> thunk(next_write_misc_thunk_, 'm'); + bool success = WriteMiscThunk(out, ArrayRef<const uint8_t>(thunk)); + CHECK(success); + next_write_misc_thunk_ = 0u; + } + return offset; + } + + void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE { + last_literal_offset_ = literal_offset; + last_patch_offset_ = patch_offset; + last_target_offset_ = target_offset; + } + + void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE { + last_literal_offset_ = patch.LiteralOffset(); + last_patch_offset_ = patch_offset; + last_target_offset_ = target_offset; + } + + uint32_t last_reserve_offset_ = 0u; + MethodReference last_reserve_method_ = kNullMethodRef; + uint32_t next_reserve_adjustment_ = 0u; + + uint32_t last_write_offset_ = 0u; + uint32_t next_write_alignment_ = 0u; + uint32_t next_write_call_thunk_ = 0u; + uint32_t next_write_misc_thunk_ = 0u; + + uint32_t last_literal_offset_ = 0u; + uint32_t last_patch_offset_ = 0u; + uint32_t last_target_offset_ = 0u; + }; + + MultiOatRelativePatcherTest() + : instruction_set_features_(InstructionSetFeatures::FromCppDefines()), + patcher_(kRuntimeISA, instruction_set_features_.get()) { + std::unique_ptr<MockPatcher> mock(new MockPatcher()); + mock_ = mock.get(); + patcher_.relative_patcher_ = std::move(mock); + } + + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + MultiOatRelativePatcher patcher_; + MockPatcher* mock_; +}; + +TEST_F(MultiOatRelativePatcherTest, Offsets) { + const DexFile* dex_file = reinterpret_cast<const DexFile*>(1); + MethodReference ref1(dex_file, 1u); + MethodReference ref2(dex_file, 2u); + EXPECT_EQ(0u, patcher_.GetOffset(ref1)); + EXPECT_EQ(0u, patcher_.GetOffset(ref2)); + + uint32_t adjustment1 = 0x1000; + patcher_.StartOatFile(adjustment1); + EXPECT_EQ(0u, patcher_.GetOffset(ref1)); + EXPECT_EQ(0u, patcher_.GetOffset(ref2)); + + uint32_t off1 = 0x1234; + patcher_.SetOffset(ref1, off1); + EXPECT_EQ(off1, patcher_.GetOffset(ref1)); + EXPECT_EQ(0u, patcher_.GetOffset(ref2)); + + uint32_t adjustment2 = 0x30000; + patcher_.StartOatFile(adjustment2); + EXPECT_EQ(off1 + adjustment1 - adjustment2, patcher_.GetOffset(ref1)); + EXPECT_EQ(0u, patcher_.GetOffset(ref2)); + + uint32_t off2 = 0x4321; + patcher_.SetOffset(ref2, off2); + EXPECT_EQ(off1 + adjustment1 - adjustment2, patcher_.GetOffset(ref1)); + EXPECT_EQ(off2, patcher_.GetOffset(ref2)); + + uint32_t adjustment3 = 0x78000; + patcher_.StartOatFile(adjustment3); + EXPECT_EQ(off1 + adjustment1 - adjustment3, patcher_.GetOffset(ref1)); + EXPECT_EQ(off2 + adjustment2 - adjustment3, patcher_.GetOffset(ref2)); +} + +TEST_F(MultiOatRelativePatcherTest, OffsetsInReserve) { + const DexFile* dex_file = reinterpret_cast<const DexFile*>(1); + MethodReference ref1(dex_file, 1u); + MethodReference ref2(dex_file, 2u); + MethodReference ref3(dex_file, 3u); + const CompiledMethod* method = reinterpret_cast<const CompiledMethod*>(-1); + + uint32_t adjustment1 = 0x1000; + patcher_.StartOatFile(adjustment1); + + uint32_t method1_offset = 0x100; + uint32_t method1_offset_check = patcher_.ReserveSpace(method1_offset, method, ref1); + ASSERT_EQ(adjustment1 + method1_offset, mock_->last_reserve_offset_); + ASSERT_TRUE(EqualRef(ref1, mock_->last_reserve_method_)); + ASSERT_EQ(method1_offset, method1_offset_check); + + uint32_t method2_offset = 0x1230; + uint32_t method2_reserve_adjustment = 0x10; + mock_->next_reserve_adjustment_ = method2_reserve_adjustment; + uint32_t method2_offset_adjusted = patcher_.ReserveSpace(method2_offset, method, ref2); + ASSERT_EQ(adjustment1 + method2_offset, mock_->last_reserve_offset_); + ASSERT_TRUE(EqualRef(ref2, mock_->last_reserve_method_)); + ASSERT_EQ(method2_offset + method2_reserve_adjustment, method2_offset_adjusted); + + uint32_t end1_offset = 0x4320; + uint32_t end1_offset_check = patcher_.ReserveSpaceEnd(end1_offset); + ASSERT_EQ(adjustment1 + end1_offset, mock_->last_reserve_offset_); + ASSERT_TRUE(EqualRef(kNullMethodRef, mock_->last_reserve_method_)); + ASSERT_EQ(end1_offset, end1_offset_check); + + uint32_t adjustment2 = 0xd000; + patcher_.StartOatFile(adjustment2); + + uint32_t method3_offset = 0xf00; + uint32_t method3_offset_check = patcher_.ReserveSpace(method3_offset, method, ref3); + ASSERT_EQ(adjustment2 + method3_offset, mock_->last_reserve_offset_); + ASSERT_TRUE(EqualRef(ref3, mock_->last_reserve_method_)); + ASSERT_EQ(method3_offset, method3_offset_check); + + uint32_t end2_offset = 0x2400; + uint32_t end2_reserve_adjustment = 0x20; + mock_->next_reserve_adjustment_ = end2_reserve_adjustment; + uint32_t end2_offset_adjusted = patcher_.ReserveSpaceEnd(end2_offset); + ASSERT_EQ(adjustment2 + end2_offset, mock_->last_reserve_offset_); + ASSERT_TRUE(EqualRef(kNullMethodRef, mock_->last_reserve_method_)); + ASSERT_EQ(end2_offset + end2_reserve_adjustment, end2_offset_adjusted); +} + +TEST_F(MultiOatRelativePatcherTest, Write) { + std::vector<uint8_t> output; + VectorOutputStream vos("output", &output); + + uint32_t adjustment1 = 0x1000; + patcher_.StartOatFile(adjustment1); + + uint32_t method1_offset = 0x100; + uint32_t method1_offset_check = patcher_.WriteThunks(&vos, method1_offset); + ASSERT_EQ(adjustment1 + method1_offset, mock_->last_write_offset_); + ASSERT_EQ(method1_offset, method1_offset_check); + vos.WriteFully("1", 1); // Mark method1. + + uint32_t method2_offset = 0x1230; + uint32_t method2_alignment_size = 1; + uint32_t method2_call_thunk_size = 2; + mock_->next_write_alignment_ = method2_alignment_size; + mock_->next_write_call_thunk_ = method2_call_thunk_size; + uint32_t method2_offset_adjusted = patcher_.WriteThunks(&vos, method2_offset); + ASSERT_EQ(adjustment1 + method2_offset, mock_->last_write_offset_); + ASSERT_EQ(method2_offset + method2_alignment_size + method2_call_thunk_size, + method2_offset_adjusted); + vos.WriteFully("2", 1); // Mark method2. + + EXPECT_EQ(method2_alignment_size, patcher_.CodeAlignmentSize()); + EXPECT_EQ(method2_call_thunk_size, patcher_.RelativeCallThunksSize()); + + uint32_t adjustment2 = 0xd000; + patcher_.StartOatFile(adjustment2); + + uint32_t method3_offset = 0xf00; + uint32_t method3_alignment_size = 2; + uint32_t method3_misc_thunk_size = 1; + mock_->next_write_alignment_ = method3_alignment_size; + mock_->next_write_misc_thunk_ = method3_misc_thunk_size; + uint32_t method3_offset_adjusted = patcher_.WriteThunks(&vos, method3_offset); + ASSERT_EQ(adjustment2 + method3_offset, mock_->last_write_offset_); + ASSERT_EQ(method3_offset + method3_alignment_size + method3_misc_thunk_size, + method3_offset_adjusted); + vos.WriteFully("3", 1); // Mark method3. + + EXPECT_EQ(method3_alignment_size, patcher_.CodeAlignmentSize()); + EXPECT_EQ(method3_misc_thunk_size, patcher_.MiscThunksSize()); + + uint8_t expected_output[] = { + '1', + 0, 'c', 'c', '2', + 0, 0, 'm', '3', + }; + ASSERT_EQ(arraysize(expected_output), output.size()); + for (size_t i = 0; i != arraysize(expected_output); ++i) { + ASSERT_EQ(expected_output[i], output[i]) << i; + } +} + +TEST_F(MultiOatRelativePatcherTest, Patch) { + std::vector<uint8_t> code(16); + + uint32_t adjustment1 = 0x1000; + patcher_.StartOatFile(adjustment1); + + uint32_t method1_literal_offset = 4u; + uint32_t method1_patch_offset = 0x1234u; + uint32_t method1_target_offset = 0x8888u; + patcher_.PatchCall(&code, method1_literal_offset, method1_patch_offset, method1_target_offset); + DCHECK_EQ(method1_literal_offset, mock_->last_literal_offset_); + DCHECK_EQ(method1_patch_offset + adjustment1, mock_->last_patch_offset_); + DCHECK_EQ(method1_target_offset + adjustment1, mock_->last_target_offset_); + + uint32_t method2_literal_offset = 12u; + uint32_t method2_patch_offset = 0x7654u; + uint32_t method2_target_offset = 0xccccu; + LinkerPatch method2_patch = + LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u); + patcher_.PatchDexCacheReference( + &code, method2_patch, method2_patch_offset, method2_target_offset); + DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_); + DCHECK_EQ(method2_patch_offset + adjustment1, mock_->last_patch_offset_); + DCHECK_EQ(method2_target_offset + adjustment1, mock_->last_target_offset_); + + uint32_t adjustment2 = 0xd000; + patcher_.StartOatFile(adjustment2); + + uint32_t method3_literal_offset = 8u; + uint32_t method3_patch_offset = 0x108u; + uint32_t method3_target_offset = 0x200u; + patcher_.PatchCall(&code, method3_literal_offset, method3_patch_offset, method3_target_offset); + DCHECK_EQ(method3_literal_offset, mock_->last_literal_offset_); + DCHECK_EQ(method3_patch_offset + adjustment2, mock_->last_patch_offset_); + DCHECK_EQ(method3_target_offset + adjustment2, mock_->last_target_offset_); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc index 82702dcf25..6727c17583 100644 --- a/compiler/linker/relative_patcher.cc +++ b/compiler/linker/relative_patcher.cc @@ -34,7 +34,8 @@ namespace art { namespace linker { std::unique_ptr<RelativePatcher> RelativePatcher::Create( - InstructionSet instruction_set, const InstructionSetFeatures* features, + InstructionSet instruction_set, + const InstructionSetFeatures* features, RelativePatcherTargetProvider* provider) { class RelativePatcherNone FINAL : public RelativePatcher { public: diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h index 8a9f3f8364..ba374512a1 100644 --- a/compiler/linker/relative_patcher.h +++ b/compiler/linker/relative_patcher.h @@ -83,23 +83,31 @@ class RelativePatcher { } // Reserve space for thunks if needed before a method, return adjusted offset. - virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + virtual uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, MethodReference method_ref) = 0; // Reserve space for thunks if needed after the last method, return adjusted offset. + // The caller may use this method to preemptively force thunk space reservation and + // then resume reservation for more methods. This is useful when there is a gap in + // the .text segment, for example when going to the next oat file for multi-image. virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0; - // Write relative call thunks if needed, return adjusted offset. + // Write relative call thunks if needed, return adjusted offset. Returns 0 on write failure. virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; // Patch method code. The input displacement is relative to the patched location, // the patcher may need to adjust it if the correct base is different. - virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) = 0; + virtual void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) = 0; // Patch a reference to a dex cache location. - virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) = 0; + virtual void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) = 0; protected: RelativePatcher() diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index bf8e786f64..704135a7b5 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -44,10 +44,22 @@ class RelativePatcherTest : public testing::Test { : compiler_options_(), verification_results_(&compiler_options_), inliner_map_(), - driver_(&compiler_options_, &verification_results_, &inliner_map_, - Compiler::kQuick, instruction_set, nullptr, - false, nullptr, nullptr, nullptr, 1u, - false, false, nullptr, -1, nullptr, nullptr), + driver_(&compiler_options_, + &verification_results_, + &inliner_map_, + Compiler::kQuick, + instruction_set, + /* instruction_set_features*/ nullptr, + /* boot_image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 1u, + /* dump_stats */ false, + /* dump_passes */ false, + /* timer */ nullptr, + /* swap_fd */ -1, + /* profile_compilation_info */ nullptr), error_msg_(), instruction_set_(instruction_set), features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), @@ -138,8 +150,10 @@ class RelativePatcherTest : public testing::Test { offset + patch.LiteralOffset(), target_offset); } else if (patch.Type() == kLinkerPatchDexCacheArray) { uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset(); - patcher_->PatchDexCacheReference(&patched_code_, patch, - offset + patch.LiteralOffset(), target_offset); + patcher_->PatchDexCacheReference(&patched_code_, + patch, + offset + patch.LiteralOffset(), + target_offset); } else { LOG(FATAL) << "Bad patch type."; } diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h index 0c881f00ba..ddc244c269 100644 --- a/compiler/linker/x86/relative_patcher_x86.h +++ b/compiler/linker/x86/relative_patcher_x86.h @@ -26,8 +26,10 @@ class X86RelativePatcher FINAL : public X86BaseRelativePatcher { public: X86RelativePatcher() { } - void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; }; } // namespace linker diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc index bc285a7849..bf3a648218 100644 --- a/compiler/linker/x86/relative_patcher_x86_base.cc +++ b/compiler/linker/x86/relative_patcher_x86_base.cc @@ -34,8 +34,10 @@ uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, return offset; // No thunks added; no limit on relative call distance. } -void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) { +void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) { DCHECK_LE(literal_offset + 4u, code->size()); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h index 9200709398..ca83a72f48 100644 --- a/compiler/linker/x86/relative_patcher_x86_base.h +++ b/compiler/linker/x86/relative_patcher_x86_base.h @@ -29,8 +29,10 @@ class X86BaseRelativePatcher : public RelativePatcher { MethodReference method_ref) OVERRIDE; uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; protected: X86BaseRelativePatcher() { } diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc index 598f3ac4a8..e571f50d2f 100644 --- a/compiler/linker/x86_64/relative_patcher_x86_64.cc +++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc @@ -23,7 +23,8 @@ namespace linker { void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) { + uint32_t patch_offset, + uint32_t target_offset) { DCHECK_LE(patch.LiteralOffset() + 4u, code->size()); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h index af687b4a2f..feecb3a2ad 100644 --- a/compiler/linker/x86_64/relative_patcher_x86_64.h +++ b/compiler/linker/x86_64/relative_patcher_x86_64.h @@ -26,8 +26,10 @@ class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { public: X86_64RelativePatcher() { } - void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, - uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; }; } // namespace linker diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index d3b404a3b6..14fd1054c3 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -31,6 +31,7 @@ #include "elf_writer.h" #include "elf_writer_quick.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "linker/multi_oat_relative_patcher.h" #include "linker/vector_output_stream.h" #include "mirror/class-inl.h" #include "mirror/object-inl.h" @@ -111,17 +112,16 @@ class OatTest : public CommonCompilerTest { compiler_kind, insn_set, insn_features_.get(), - false, - nullptr, - nullptr, - nullptr, - 2, - true, - true, + /* boot_image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 2, + /* dump_stats */ true, + /* dump_passes */ true, timer_.get(), - -1, - nullptr, - nullptr)); + /* swap_fd */ -1, + /* profile_compilation_info */ nullptr)); } bool WriteElf(File* file, @@ -200,7 +200,13 @@ class OatTest : public CommonCompilerTest { ScopedObjectAccess soa(Thread::Current()); class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc()); } - oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files); + linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(), + instruction_set_features_.get()); + oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files, &patcher); + size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset(); + size_t text_size = oat_writer.GetSize() - rodata_size; + elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer.GetBssSize()); + if (!oat_writer.WriteRodata(rodata)) { return false; } @@ -216,7 +222,6 @@ class OatTest : public CommonCompilerTest { return false; } - elf_writer->SetBssSize(oat_writer.GetBssSize()); elf_writer->WriteDynamicSection(); elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo()); elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations()); @@ -416,7 +421,7 @@ TEST_F(OatTest, WriteRead) { // TODO We should also check copied methods in this test. for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) { if (!klass->IsInterface()) { - EXPECT_FALSE(m.MightBeCopied()); + EXPECT_FALSE(m.IsCopied()); } CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file); ++method_index; diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 47dcfd56f8..c60b02a227 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -38,8 +38,8 @@ #include "gc/space/space.h" #include "handle_scope-inl.h" #include "image_writer.h" +#include "linker/multi_oat_relative_patcher.h" #include "linker/output_stream.h" -#include "linker/relative_patcher.h" #include "mirror/array.h" #include "mirror/class_loader.h" #include "mirror/dex_cache-inl.h" @@ -292,7 +292,8 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings) size_oat_class_status_(0), size_oat_class_method_bitmaps_(0), size_oat_class_method_offsets_(0), - method_offset_map_() { + relative_patcher_(nullptr), + absolute_patch_locations_() { } bool OatWriter::AddDexFileSource(const char* filename, @@ -438,21 +439,21 @@ bool OatWriter::WriteAndOpenDexFiles( void OatWriter::PrepareLayout(const CompilerDriver* compiler, ImageWriter* image_writer, - const std::vector<const DexFile*>& dex_files) { + const std::vector<const DexFile*>& dex_files, + linker::MultiOatRelativePatcher* relative_patcher) { CHECK(write_state_ == WriteState::kPrepareLayout); - dex_files_ = &dex_files; - compiler_driver_ = compiler; image_writer_ = image_writer; + dex_files_ = &dex_files; + relative_patcher_ = relative_patcher; + SetMultiOatRelativePatcherAdjustment(); + if (compiling_boot_image_) { CHECK(image_writer_ != nullptr); } InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); CHECK_EQ(instruction_set, oat_header_->GetInstructionSet()); - const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); - relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, - &method_offset_map_); uint32_t offset = size_; { @@ -727,13 +728,11 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { // Deduplicate code arrays if we are not producing debuggable code. bool deduped = false; MethodReference method_ref(dex_file_, it.GetMemberIndex()); - auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); if (debuggable_) { - if (method_lb != writer_->method_offset_map_.map.end() && - !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { + quick_code_offset = writer_->relative_patcher_->GetOffset(method_ref); + if (quick_code_offset != 0u) { // Duplicate methods, we want the same code for both of them so that the oat writer puts // the same code in both ArtMethods so that we do not get different oat code at runtime. - quick_code_offset = method_lb->second; deduped = true; } else { quick_code_offset = NewQuickCodeOffset(compiled_method, it, thumb_offset); @@ -750,14 +749,14 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } if (code_size != 0) { - if (method_lb != writer_->method_offset_map_.map.end() && - !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { + if (writer_->relative_patcher_->GetOffset(method_ref) != 0u) { // TODO: Should this be a hard failure? LOG(WARNING) << "Multiple definitions of " << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) - << " offsets " << method_lb->second << " " << quick_code_offset; + << " offsets " << writer_->relative_patcher_->GetOffset(method_ref) + << " " << quick_code_offset; } else { - writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); + writer_->relative_patcher_->SetOffset(method_ref, quick_code_offset); } } @@ -1106,27 +1105,29 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { patched_code_.assign(quick_code.begin(), quick_code.end()); quick_code = ArrayRef<const uint8_t>(patched_code_); for (const LinkerPatch& patch : compiled_method->GetPatches()) { + uint32_t literal_offset = patch.LiteralOffset(); if (patch.Type() == kLinkerPatchCallRelative) { // NOTE: Relative calls across oat files are not supported. uint32_t target_offset = GetTargetOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, - offset_ + literal_offset, target_offset); + writer_->relative_patcher_->PatchCall(&patched_code_, + literal_offset, + offset_ + literal_offset, + target_offset); } else if (patch.Type() == kLinkerPatchDexCacheArray) { uint32_t target_offset = GetDexCacheOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, + writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, + patch, offset_ + literal_offset, target_offset); } else if (patch.Type() == kLinkerPatchCall) { uint32_t target_offset = GetTargetOffset(patch); - PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); + PatchCodeAddress(&patched_code_, literal_offset, target_offset); } else if (patch.Type() == kLinkerPatchMethod) { ArtMethod* method = GetTargetMethod(patch); - PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method); + PatchMethodAddress(&patched_code_, literal_offset, method); } else if (patch.Type() == kLinkerPatchType) { mirror::Class* type = GetTargetType(patch); - PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type); + PatchObjectAddress(&patched_code_, literal_offset, type); } } } @@ -1172,16 +1173,16 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { - auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod()); - uint32_t target_offset = - (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u; - // If there's no compiled code, point to the correct trampoline. + uint32_t target_offset = writer_->relative_patcher_->GetOffset(patch.TargetMethod()); + // If there's no new compiled code, either we're compiling an app and the target method + // is in the boot image, or we need to point to the correct trampoline. if (UNLIKELY(target_offset == 0)) { ArtMethod* target = GetTargetMethod(patch); DCHECK(target != nullptr); size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet()); const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size); if (oat_code_offset != 0) { + DCHECK(!writer_->HasBootImage()); DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset)); DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset)); DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset)); @@ -1206,11 +1207,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { if (writer_->HasBootImage()) { - auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>( - patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); - const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_); - const uint8_t* oat_data = - writer_->image_writer_->GetOatFileBegin(oat_filename) + file_offset_; + uintptr_t element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<uintptr_t>( + patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); + size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_); + uintptr_t oat_data = writer_->image_writer_->GetOatDataBegin(oat_index); return element - oat_data; } else { size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile()); @@ -1270,9 +1270,13 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { SHARED_REQUIRES(Locks::mutator_lock_) { uint32_t address = target_offset; if (writer_->HasBootImage()) { - const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_); - address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin(oat_filename) + - writer_->oat_data_offset_ + target_offset); + size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_); + // TODO: Clean up offset types. + // The target_offset must be treated as signed for cross-oat patching. + const void* target = reinterpret_cast<const void*>( + writer_->image_writer_->GetOatDataBegin(oat_index) + + static_cast<int32_t>(target_offset)); + address = PointerToLowMemUInt32(target); } DCHECK_LE(offset + 4, code->size()); uint8_t* data = &(*code)[offset]; @@ -1540,6 +1544,8 @@ bool OatWriter::WriteRodata(OutputStream* out) { bool OatWriter::WriteCode(OutputStream* out) { CHECK(write_state_ == WriteState::kWriteText); + SetMultiOatRelativePatcherAdjustment(); + const size_t file_offset = oat_data_offset_; size_t relative_offset = oat_header_->GetExecutableOffset(); DCHECK_OFFSET(); @@ -1781,7 +1787,7 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out, return relative_offset; } -bool OatWriter::GetOatDataOffset(OutputStream* out) { +bool OatWriter::RecordOatDataOffset(OutputStream* out) { // Get the elf file offset of the oat file. const off_t raw_file_offset = out->Seek(0, kSeekCurrent); if (raw_file_offset == static_cast<off_t>(-1)) { @@ -1833,7 +1839,7 @@ bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) { TimingLogger::ScopedTiming split("WriteDexFiles", timings_); // Get the elf file offset of the oat file. - if (!GetOatDataOffset(rodata)) { + if (!RecordOatDataOffset(rodata)) { return false; } @@ -2261,12 +2267,15 @@ bool OatWriter::WriteData(OutputStream* out, const void* data, size_t size) { return out->WriteFully(data, size); } -std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) { - auto it = map.find(ref); - if (it == map.end()) { - return std::pair<bool, uint32_t>(false, 0u); - } else { - return std::pair<bool, uint32_t>(true, it->second); +void OatWriter::SetMultiOatRelativePatcherAdjustment() { + DCHECK(dex_files_ != nullptr); + DCHECK(relative_patcher_ != nullptr); + DCHECK_NE(oat_data_offset_, 0u); + if (image_writer_ != nullptr && !dex_files_->empty()) { + // The oat data begin may not be initialized yet but the oat file offset is ready. + size_t oat_index = image_writer_->GetOatIndexForDexFile(dex_files_->front()); + size_t elf_file_offset = image_writer_->GetOatFileOffset(oat_index); + relative_patcher_->StartOatFile(elf_file_offset + oat_data_offset_); } } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 5a55fc6c95..74aab4efd0 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -47,6 +47,10 @@ namespace debug { struct MethodDebugInfo; } // namespace debug +namespace linker { +class MultiOatRelativePatcher; +} // namespace linker + // OatHeader variable length with count of D OatDexFiles // // OatDexFile[0] one variable sized OatDexFile with offsets to Dex and OatClasses @@ -153,7 +157,8 @@ class OatWriter { // Prepare layout of remaining data. void PrepareLayout(const CompilerDriver* compiler, ImageWriter* image_writer, - const std::vector<const DexFile*>& dex_files); + const std::vector<const DexFile*>& dex_files, + linker::MultiOatRelativePatcher* relative_patcher); // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps). bool WriteRodata(OutputStream* out); // Write the code to the .text section. @@ -187,6 +192,10 @@ class OatWriter { return bss_size_; } + size_t GetOatDataOffset() const { + return oat_data_offset_; + } + ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const { return ArrayRef<const uintptr_t>(absolute_patch_locations_); } @@ -249,7 +258,7 @@ class OatWriter { size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset); size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset); - bool GetOatDataOffset(OutputStream* out); + bool RecordOatDataOffset(OutputStream* out); bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file); bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location); bool WriteDexFiles(OutputStream* rodata, File* file); @@ -268,6 +277,7 @@ class OatWriter { const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files); bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); bool WriteData(OutputStream* out, const void* data, size_t size); + void SetMultiOatRelativePatcherAdjustment(); enum class WriteState { kAddingDexFileSources, @@ -358,20 +368,12 @@ class OatWriter { uint32_t size_oat_class_method_bitmaps_; uint32_t size_oat_class_method_offsets_; - std::unique_ptr<linker::RelativePatcher> relative_patcher_; + // The helper for processing relative patches is external so that we can patch across oat files. + linker::MultiOatRelativePatcher* relative_patcher_; // The locations of absolute patches relative to the start of the executable section. dchecked_vector<uintptr_t> absolute_patch_locations_; - // Map method reference to assigned offset. - // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. - class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider { - public: - std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE; - SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; - }; - MethodOffsetMap method_offset_map_; - DISALLOW_COPY_AND_ASSIGN(OatWriter); }; diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 05e1356ed8..35ec7d41ff 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -368,7 +368,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item if (native_debuggable) { const uint32_t num_instructions = code_item.insns_size_in_code_units_; native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false); - native_debug_info_locations->ClearAllBits(); FindNativeDebugInfoLocations(code_item, native_debug_info_locations); } @@ -443,23 +442,15 @@ void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_i } }; dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations); - // Add native debug info at the start of every basic block. - for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) { - if (FindBlockStartingAt(pc) != nullptr) { - locations->SetBit(pc); - } - } // Instruction-specific tweaks. const Instruction* const begin = Instruction::At(code_item.insns_); const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_); for (const Instruction* inst = begin; inst < end; inst = inst->Next()) { switch (inst->Opcode()) { - case Instruction::MOVE_EXCEPTION: - case Instruction::MOVE_RESULT: - case Instruction::MOVE_RESULT_WIDE: - case Instruction::MOVE_RESULT_OBJECT: { - // The compiler checks that there are no instructions before those. - // So generate HNativeDebugInfo after them instead. + case Instruction::MOVE_EXCEPTION: { + // Stop in native debugger after the exception has been moved. + // The compiler also expects the move at the start of basic block so + // we do not want to interfere by inserting native-debug-info before it. locations->ClearBit(inst->GetDexPc(code_item.insns_)); const Instruction* next = inst->Next(); if (next < end) { diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c2c8ccfc56..967d156cf6 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -195,6 +195,8 @@ void CodeGenerator::GenerateSlowPaths() { if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } + // Record the dex pc at start of slow path (required for java line number mapping). + MaybeRecordNativeDebugInfo(nullptr /* instruction */, slow_path->GetDexPc()); slow_path->EmitNativeCode(this); if (disasm_info_ != nullptr) { disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); @@ -226,6 +228,10 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // errors where we reference that label. if (block->IsSingleJump()) continue; Bind(block); + // This ensures that we have correct native line mapping for all native instructions. + // It is necessary to make stepping over a statement work. Otherwise, any initial + // instructions (e.g. moves) would be assumed to be the start of next statement. + MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc()); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); DisassemblyScope disassembly_scope(current, *this); @@ -733,7 +739,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t native_pc = GetAssembler()->CodeSize(); if (instruction == nullptr) { - // For stack overflow checks. + // For stack overflow checks and native-debug-info entries without dex register + // mapping (i.e. start of basic block or start of slow path). stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream_.EndStackMapEntry(); return; @@ -808,6 +815,16 @@ bool CodeGenerator::HasStackMapAtCurrentPc() { return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc; } +void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc) { + if (GetCompilerOptions().GetNativeDebuggable() && dex_pc != kNoDexPc) { + if (HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + GenerateNop(); + } + RecordPcInfo(instruction, dex_pc); + } +} + void CodeGenerator::RecordCatchBlockInfo() { ArenaAllocator* arena = graph_->GetArena(); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 49c193e7bf..9297fc956f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -69,7 +69,7 @@ class CodeAllocator { class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: - SlowPathCode() { + explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { saved_core_stack_offsets_[i] = kRegisterNotSaved; saved_fpu_stack_offsets_[i] = kRegisterNotSaved; @@ -106,9 +106,15 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { Label* GetEntryLabel() { return &entry_label_; } Label* GetExitLabel() { return &exit_label_; } + uint32_t GetDexPc() const { + return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; + } + protected: static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; static constexpr uint32_t kRegisterNotSaved = -1; + // The instruction where this slow path is happening. + HInstruction* instruction_; uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; @@ -267,6 +273,8 @@ class CodeGenerator { void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); // Check whether we have already recorded mapping at this PC. bool HasStackMapAtCurrentPc(); + // Record extra stack maps if we support native debugging. + void MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -440,6 +448,8 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; + virtual void GenerateNop() = 0; + protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 87f52c6f21..10d3426a58 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -64,7 +64,7 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; class NullCheckSlowPathARM : public SlowPathCode { public: - explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {} + explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -83,13 +83,12 @@ class NullCheckSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; } private: - HNullCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM); }; class DivZeroCheckSlowPathARM : public SlowPathCode { public: - explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -108,14 +107,13 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM); }; class SuspendCheckSlowPathARM : public SlowPathCode { public: SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -144,7 +142,6 @@ class SuspendCheckSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM"; } private: - HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -157,7 +154,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { class BoundsCheckSlowPathARM : public SlowPathCode { public: explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction) - : instruction_(instruction) {} + : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -188,8 +185,6 @@ class BoundsCheckSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM); }; @@ -199,7 +194,7 @@ class LoadClassSlowPathARM : public SlowPathCode { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -253,7 +248,7 @@ class LoadClassSlowPathARM : public SlowPathCode { class LoadStringSlowPathARM : public SlowPathCode { public: - explicit LoadStringSlowPathARM(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -264,7 +259,8 @@ class LoadStringSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -277,15 +273,13 @@ class LoadStringSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM); }; class TypeCheckSlowPathARM : public SlowPathCode { public: TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal) - : instruction_(instruction), is_fatal_(is_fatal) {} + : SlowPathCode(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -340,7 +334,6 @@ class TypeCheckSlowPathARM : public SlowPathCode { bool IsFatal() const OVERRIDE { return is_fatal_; } private: - HInstruction* const instruction_; const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM); @@ -349,7 +342,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { class DeoptimizationSlowPathARM : public SlowPathCode { public: explicit DeoptimizationSlowPathARM(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -365,13 +358,12 @@ class DeoptimizationSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; class ArraySetSlowPathARM : public SlowPathCode { public: - explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {} + explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -410,8 +402,6 @@ class ArraySetSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; @@ -419,7 +409,7 @@ class ArraySetSlowPathARM : public SlowPathCode { class ReadBarrierMarkSlowPathARM : public SlowPathCode { public: ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj) - : instruction_(instruction), out_(out), obj_(obj) { + : SlowPathCode(instruction), out_(out), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -458,7 +448,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { } private: - HInstruction* const instruction_; const Location out_; const Location obj_; @@ -474,7 +463,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { Location obj, uint32_t offset, Location index) - : instruction_(instruction), + : SlowPathCode(instruction), out_(out), ref_(ref), obj_(obj), @@ -629,7 +618,6 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { UNREACHABLE(); } - HInstruction* const instruction_; const Location out_; const Location ref_; const Location obj_; @@ -646,7 +634,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { class ReadBarrierForRootSlowPathARM : public SlowPathCode { public: ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) { + : SlowPathCode(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } @@ -679,7 +667,6 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; } private: - HInstruction* const instruction_; const Location out_; const Location root_; @@ -1557,11 +1544,11 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorARM::GenerateNop() { + __ nop(); } void LocationsBuilderARM::HandleCondition(HCondition* cond) { @@ -6426,6 +6413,33 @@ Literal* CodeGeneratorARM::DeduplicateMethodCodeLiteral(MethodReference target_m return DeduplicateMethodLiteral(target_method, &call_patches_); } +void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { + LocationSummary* locations = instr->GetLocations(); + Register res = locations->Out().AsRegister<Register>(); + Register accumulator = + locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex).AsRegister<Register>(); + Register mul_left = + locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex).AsRegister<Register>(); + Register mul_right = + locations->InAt(HMultiplyAccumulate::kInputMulRightIndex).AsRegister<Register>(); + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ mla(res, mul_left, mul_right, accumulator); + } else { + __ mls(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index cfd7a3bc14..06e7c0015c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,6 +159,7 @@ class LocationsBuilderARM : public HGraphVisitor { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -197,6 +198,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -510,6 +512,8 @@ class CodeGeneratorARM : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void GenerateNop(); + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 435ae5e954..25487d2fad 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -219,7 +219,7 @@ void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSum class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {} + explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -246,14 +246,12 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); }; class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -272,7 +270,6 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); }; @@ -282,7 +279,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCodeARM64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -337,7 +334,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit LoadStringSlowPathARM64(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -348,7 +345,8 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), string_index); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -362,14 +360,12 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {} + explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -388,15 +384,13 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } private: - HNullCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); }; class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCodeARM64(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -425,7 +419,6 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } private: - HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -438,7 +431,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { public: TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) - : instruction_(instruction), is_fatal_(is_fatal) {} + : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -487,7 +480,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { bool IsFatal() const { return is_fatal_; } private: - HInstruction* const instruction_; const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); @@ -496,7 +488,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -512,13 +504,12 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {} + explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -557,8 +548,6 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); }; @@ -588,7 +577,7 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) - : instruction_(instruction), out_(out), obj_(obj) { + : SlowPathCodeARM64(instruction), out_(out), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -627,7 +616,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { } private: - HInstruction* const instruction_; const Location out_; const Location obj_; @@ -643,7 +631,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { Location obj, uint32_t offset, Location index) - : instruction_(instruction), + : SlowPathCodeARM64(instruction), out_(out), ref_(ref), obj_(obj), @@ -804,7 +792,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { UNREACHABLE(); } - HInstruction* const instruction_; const Location out_; const Location ref_; const Location obj_; @@ -821,7 +808,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) { + : SlowPathCodeARM64(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } @@ -865,7 +852,6 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } private: - HInstruction* const instruction_; const Location out_; const Location root_; @@ -1876,6 +1862,36 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instr) { + DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + locations->SetInAt(0, Location::RequiresRegister()); + // There is no immediate variant of negated bitwise instructions in AArch64. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64BitwiseNegatedRight( + HArm64BitwiseNegatedRight* instr) { + Register dst = OutputRegister(instr); + Register lhs = InputRegisterAt(instr, 0); + Register rhs = InputRegisterAt(instr, 1); + + switch (instr->GetOpKind()) { + case HInstruction::kAnd: + __ Bic(dst, lhs, rhs); + break; + case HInstruction::kOr: + __ Orn(dst, lhs, rhs); + break; + case HInstruction::kXor: + __ Eon(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unreachable"; + } +} + void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( HArm64DataProcWithShifterOp* instruction) { DCHECK(instruction->GetType() == Primitive::kPrimInt || @@ -1973,21 +1989,27 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } -void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { +void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); - locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, - Location::RequiresRegister()); - locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); - locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); + if (instr->GetOpKind() == HInstruction::kSub && + accumulator->IsConstant() && + accumulator->AsConstant()->IsZero()) { + // Don't allocate register for Mneg instruction. + } else { + locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + } + locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { +void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { Register res = OutputRegister(instr); - Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); - Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); - Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); // Avoid emitting code that could trigger Cortex A53's erratum 835769. // This fixup should be carried out for all multiply-accumulate instructions: @@ -2007,10 +2029,17 @@ void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyA } if (instr->GetOpKind() == HInstruction::kAdd) { + Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); __ Madd(res, mul_left, mul_right, accumulator); } else { DCHECK(instr->GetOpKind() == HInstruction::kSub); - __ Msub(res, mul_left, mul_right, accumulator); + HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); + if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) { + __ Mneg(res, mul_left, mul_right); + } else { + Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); + __ Msub(res, mul_left, mul_right, accumulator); + } } } @@ -3057,11 +3086,11 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ Nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorARM64::GenerateNop() { + __ Nop(); } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 360488eb4a..10f1e7f008 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -66,7 +66,8 @@ Location ARM64ReturnLocation(Primitive::Type return_type); class SlowPathCodeARM64 : public SlowPathCode { public: - SlowPathCodeARM64() : entry_label_(), exit_label_() {} + explicit SlowPathCodeARM64(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} vixl::Label* GetEntryLabel() { return &entry_label_; } vixl::Label* GetExitLabel() { return &exit_label_; } @@ -195,6 +196,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -284,6 +286,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -532,6 +535,8 @@ class CodeGeneratorARM64 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void GenerateNop(); + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index c500ea4408..8d3d94b79d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -39,9 +39,6 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; -// We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr FRegister FTMP = F8; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -149,7 +146,7 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : instruction_(instruction) {} + explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -181,14 +178,12 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS); }; class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); @@ -210,7 +205,6 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS); }; @@ -220,7 +214,7 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCodeMIPS(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -279,7 +273,7 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit LoadStringSlowPathMIPS(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -290,7 +284,8 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), @@ -309,14 +304,12 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS); }; class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit NullCheckSlowPathMIPS(HNullCheck* instr) : instruction_(instr) {} + explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); @@ -338,15 +331,13 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; } private: - HNullCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS); }; class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { public: SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCodeMIPS(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); @@ -374,7 +365,6 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; } private: - HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -386,7 +376,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : instruction_(instruction) {} + explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -437,15 +427,13 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS); }; class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { public: explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); @@ -462,7 +450,6 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; @@ -3407,11 +3394,11 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ Nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorMIPS::GenerateNop() { + __ Nop(); } void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index dd0641c7ca..605c794421 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -152,7 +152,8 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap { class SlowPathCodeMIPS : public SlowPathCode { public: - SlowPathCodeMIPS() : entry_label_(), exit_label_() {} + explicit SlowPathCodeMIPS(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} MipsLabel* GetEntryLabel() { return &entry_label_; } MipsLabel* GetExitLabel() { return &exit_label_; } @@ -360,6 +361,8 @@ class CodeGeneratorMIPS : public CodeGenerator { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; } + void GenerateNop(); + private: // Labels for each block that will be compiled. MipsLabel* block_labels_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index e3a44f1c96..c2b84b4335 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -37,9 +37,6 @@ namespace mips64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; -// We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr FpuRegister FTMP = F8; - Location Mips64ReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -110,7 +107,7 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : instruction_(instruction) {} + explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -141,14 +138,12 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64); }; class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -169,7 +164,6 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64); }; @@ -179,7 +173,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCodeMIPS64(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -234,7 +228,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -245,7 +239,8 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), @@ -263,14 +258,12 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64); }; class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : instruction_(instr) {} + explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -291,15 +284,13 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; } private: - HNullCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64); }; class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCodeMIPS64(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -326,7 +317,6 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; } private: - HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -338,7 +328,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : instruction_(instruction) {} + explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -384,15 +374,13 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64); }; class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -408,7 +396,6 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; @@ -2732,11 +2719,11 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ Nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorMIPS64::GenerateNop() { + __ Nop(); } void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index eb7315aa7a..ba9eaff46f 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -152,7 +152,8 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { class SlowPathCodeMIPS64 : public SlowPathCode { public: - SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} + explicit SlowPathCodeMIPS64(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} Mips64Label* GetEntryLabel() { return &entry_label_; } Mips64Label* GetExitLabel() { return &exit_label_; } @@ -352,6 +353,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; } + void GenerateNop(); + private: // Labels for each block that will be compiled. Mips64Label* block_labels_; // Indexed by block id. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f032f51649..88e42f3faf 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -52,7 +52,7 @@ static constexpr int kFakeReturnRegister = Register(8); class NullCheckSlowPathX86 : public SlowPathCode { public: - explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {} + explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); @@ -73,13 +73,12 @@ class NullCheckSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; } private: - HNullCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); }; class DivZeroCheckSlowPathX86 : public SlowPathCode { public: - explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); @@ -100,13 +99,13 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86); }; class DivRemMinusOneSlowPathX86 : public SlowPathCode { public: - DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {} + DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div) + : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); @@ -128,7 +127,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode { class BoundsCheckSlowPathX86 : public SlowPathCode { public: - explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : instruction_(instruction) {} + explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -160,15 +159,13 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86); }; class SuspendCheckSlowPathX86 : public SlowPathCode { public: SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); @@ -199,7 +196,6 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; } private: - HSuspendCheck* const instruction_; HBasicBlock* const successor_; Label return_label_; @@ -208,7 +204,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { class LoadStringSlowPathX86 : public SlowPathCode { public: - explicit LoadStringSlowPathX86(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -219,7 +215,8 @@ class LoadStringSlowPathX86 : public SlowPathCode { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index)); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), @@ -234,8 +231,6 @@ class LoadStringSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); }; @@ -245,7 +240,7 @@ class LoadClassSlowPathX86 : public SlowPathCode { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -299,7 +294,7 @@ class LoadClassSlowPathX86 : public SlowPathCode { class TypeCheckSlowPathX86 : public SlowPathCode { public: TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal) - : instruction_(instruction), is_fatal_(is_fatal) {} + : SlowPathCode(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -356,7 +351,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode { bool IsFatal() const OVERRIDE { return is_fatal_; } private: - HInstruction* const instruction_; const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86); @@ -365,7 +359,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { class DeoptimizationSlowPathX86 : public SlowPathCode { public: explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); @@ -381,13 +375,12 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); }; class ArraySetSlowPathX86 : public SlowPathCode { public: - explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {} + explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -426,8 +419,6 @@ class ArraySetSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; @@ -435,7 +426,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj) - : instruction_(instruction), out_(out), obj_(obj) { + : SlowPathCode(instruction), out_(out), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -474,7 +465,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { } private: - HInstruction* const instruction_; const Location out_; const Location obj_; @@ -490,7 +480,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { Location obj, uint32_t offset, Location index) - : instruction_(instruction), + : SlowPathCode(instruction), out_(out), ref_(ref), obj_(obj), @@ -645,7 +635,6 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { UNREACHABLE(); } - HInstruction* const instruction_; const Location out_; const Location ref_; const Location obj_; @@ -662,7 +651,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) { + : SlowPathCode(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } @@ -695,7 +684,6 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } private: - HInstruction* const instruction_; const Location out_; const Location root_; @@ -1649,11 +1637,11 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorX86::GenerateNop() { + __ nop(); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -3453,9 +3441,8 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr GenerateDivRemWithAnyConstant(instruction); } } else { - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), - is_div); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86( + instruction, out.AsRegister<Register>(), is_div); codegen_->AddSlowPath(slow_path); Register second_reg = second.AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 63e9b2fc9c..0795f3b530 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -540,6 +540,7 @@ class CodeGeneratorX86 : public CodeGenerator { } } + void GenerateNop(); private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f3c40b109f..bb24c6f59c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -56,7 +56,7 @@ static constexpr int kC2ConditionMask = 0x400; class NullCheckSlowPathX86_64 : public SlowPathCode { public: - explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} + explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); @@ -77,13 +77,12 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } private: - HNullCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); }; class DivZeroCheckSlowPathX86_64 : public SlowPathCode { public: - explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {} + explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); @@ -104,14 +103,13 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } private: - HDivZeroCheck* const instruction_; DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); }; class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { public: - DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div) - : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} + DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div) + : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); @@ -145,7 +143,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { class SuspendCheckSlowPathX86_64 : public SlowPathCode { public: SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) - : instruction_(instruction), successor_(successor) {} + : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); @@ -176,7 +174,6 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } private: - HSuspendCheck* const instruction_; HBasicBlock* const successor_; Label return_label_; @@ -186,7 +183,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { class BoundsCheckSlowPathX86_64 : public SlowPathCode { public: explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) - : instruction_(instruction) {} + : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -218,8 +215,6 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } private: - HBoundsCheck* const instruction_; - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); }; @@ -229,7 +224,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { HInstruction* at, uint32_t dex_pc, bool do_clinit) - : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -286,7 +281,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { class LoadStringSlowPathX86_64 : public SlowPathCode { public: - explicit LoadStringSlowPathX86_64(HLoadString* instruction) : instruction_(instruction) {} + explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -297,8 +292,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), - Immediate(instruction_->GetStringIndex())); + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), @@ -312,15 +307,13 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } private: - HLoadString* const instruction_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); }; class TypeCheckSlowPathX86_64 : public SlowPathCode { public: TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) - : instruction_(instruction), is_fatal_(is_fatal) {} + : SlowPathCode(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -379,7 +372,6 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { bool IsFatal() const OVERRIDE { return is_fatal_; } private: - HInstruction* const instruction_; const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); @@ -388,7 +380,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { class DeoptimizationSlowPathX86_64 : public SlowPathCode { public: explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) - : instruction_(instruction) {} + : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); @@ -404,13 +396,12 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } private: - HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; class ArraySetSlowPathX86_64 : public SlowPathCode { public: - explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {} + explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -449,8 +440,6 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } private: - HInstruction* const instruction_; - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; @@ -458,7 +447,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj) - : instruction_(instruction), out_(out), obj_(obj) { + : SlowPathCode(instruction), out_(out), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -497,7 +486,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { } private: - HInstruction* const instruction_; const Location out_; const Location obj_; @@ -513,7 +501,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { Location obj, uint32_t offset, Location index) - : instruction_(instruction), + : SlowPathCode(instruction), out_(out), ref_(ref), obj_(obj), @@ -667,7 +655,6 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { UNREACHABLE(); } - HInstruction* const instruction_; const Location out_; const Location ref_; const Location obj_; @@ -684,7 +671,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) { + : SlowPathCode(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } @@ -716,7 +703,6 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } private: - HInstruction* const instruction_; const Location out_; const Location root_; @@ -1632,11 +1618,11 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - if (codegen_->HasStackMapAtCurrentPc()) { - // Ensure that we do not collide with the stack map of the previous instruction. - __ nop(); - } - codegen_->RecordPcInfo(info, info->GetDexPc()); + codegen_->MaybeRecordNativeDebugInfo(info, info->GetDexPc()); +} + +void CodeGeneratorX86_64::GenerateNop() { + __ nop(); } void LocationsBuilderX86_64::VisitLocal(HLocal* local) { @@ -3546,7 +3532,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in } else { SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( - out.AsRegister(), type, is_div); + instruction, out.AsRegister(), type, is_div); codegen_->AddSlowPath(slow_path); CpuRegister second_reg = second.AsRegister<CpuRegister>(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 97f6f84236..b3d27e194a 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -513,6 +513,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { } } + void GenerateNop(); + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index c0263e4e5b..b9638f2027 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -436,17 +436,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) + void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + #ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { StartAttributeStream("shift") << instruction->GetShiftAmount(); } } - - void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { - StartAttributeStream("kind") << instruction->GetOpKind(); - } #endif bool IsPass(const char* name) { diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 37f2d79536..82a898a9f1 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -379,7 +379,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferShl(Inducti Primitive::Type type) { // Transfer over a shift left: treat shift by restricted constant as equivalent multiplication. int64_t value = -1; - if (a != nullptr && IsIntAndGet(b, &value)) { + if (a != nullptr && IsExact(b, &value)) { // Obtain the constant needed for the multiplication. This yields an existing instruction // if the constants is already there. Otherwise, this has a side effect on the HIR. // The restriction on the shift factor avoids generating a negative constant @@ -546,14 +546,17 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, // Analyze condition with induction at left-hand-side (e.g. i < U). InductionInfo* lower_expr = a->op_b; InductionInfo* upper_expr = b; - InductionInfo* stride = a->op_a; + InductionInfo* stride_expr = a->op_a; + // Constant stride? int64_t stride_value = 0; - if (!IsIntAndGet(stride, &stride_value)) { + if (!IsExact(stride_expr, &stride_value)) { return; } - // Rewrite condition i != U into i < U or i > U if end condition is reached exactly. - if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) || - (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) { + // Rewrite condition i != U into strict end condition i < U or i > U if this end condition + // is reached exactly (tested by verifying if the loop has a unit stride and the non-strict + // condition would be always taken). + if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLE)) || + (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGE)))) { cmp = stride_value > 0 ? kCondLT : kCondGT; } // Normalize a linear loop control with a nonzero stride: @@ -561,7 +564,7 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, // stride < 0, either i > U or i >= U if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) || (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) { - VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp); + VisitTripCount(loop, lower_expr, upper_expr, stride_expr, stride_value, type, cmp); } } } @@ -569,7 +572,7 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, InductionInfo* lower_expr, InductionInfo* upper_expr, - InductionInfo* stride, + InductionInfo* stride_expr, int64_t stride_value, Primitive::Type type, IfCondition cmp) { @@ -612,9 +615,10 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, trip_count = CreateInvariantOp(kAdd, trip_count, CreateConstant(1, type)); } // Compensate for stride. - trip_count = CreateInvariantOp(kAdd, trip_count, stride); + trip_count = CreateInvariantOp(kAdd, trip_count, stride_expr); } - trip_count = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride); + trip_count = CreateInvariantOp( + kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride_expr); // Assign the trip-count expression to the loop control. Clients that use the information // should be aware that the expression is only valid under the conditions listed above. InductionOp tcKind = kTripCountInBodyUnsafe; // needs both tests @@ -644,14 +648,25 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, IfCondition cmp) { int64_t lower_value; int64_t upper_value; - if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) { - switch (cmp) { - case kCondLT: return lower_value < upper_value; - case kCondLE: return lower_value <= upper_value; - case kCondGT: return lower_value > upper_value; - case kCondGE: return lower_value >= upper_value; - default: LOG(FATAL) << "CONDITION UNREACHABLE"; - } + switch (cmp) { + case kCondLT: + return IsAtMost(lower_expr, &lower_value) + && IsAtLeast(upper_expr, &upper_value) + && lower_value < upper_value; + case kCondLE: + return IsAtMost(lower_expr, &lower_value) + && IsAtLeast(upper_expr, &upper_value) + && lower_value <= upper_value; + case kCondGT: + return IsAtLeast(lower_expr, &lower_value) + && IsAtMost(upper_expr, &upper_value) + && lower_value > upper_value; + case kCondGE: + return IsAtLeast(lower_expr, &lower_value) + && IsAtMost(upper_expr, &upper_value) + && lower_value >= upper_value; + default: + LOG(FATAL) << "CONDITION UNREACHABLE"; } return false; // not certain, may be untaken } @@ -660,25 +675,23 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, int64_t stride_value, Primitive::Type type, IfCondition cmp) { - const int64_t min = type == Primitive::kPrimInt - ? std::numeric_limits<int32_t>::min() - : std::numeric_limits<int64_t>::min(); - const int64_t max = type == Primitive::kPrimInt - ? std::numeric_limits<int32_t>::max() - : std::numeric_limits<int64_t>::max(); + const int64_t min = type == Primitive::kPrimInt ? std::numeric_limits<int32_t>::min() + : std::numeric_limits<int64_t>::min(); + const int64_t max = type == Primitive::kPrimInt ? std::numeric_limits<int32_t>::max() + : std::numeric_limits<int64_t>::max(); // Some rules under which it is certain at compile-time that the loop is finite. int64_t value; switch (cmp) { case kCondLT: return stride_value == 1 || - (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1)); + (IsAtMost(upper_expr, &value) && value <= (max - stride_value + 1)); case kCondLE: - return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value)); + return (IsAtMost(upper_expr, &value) && value <= (max - stride_value)); case kCondGT: return stride_value == -1 || - (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1)); + (IsAtLeast(upper_expr, &value) && value >= (min - stride_value - 1)); case kCondGE: - return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value)); + return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value)); default: LOG(FATAL) << "CONDITION UNREACHABLE"; } @@ -733,7 +746,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv // More exhaustive simplifications are done by later phases once induction nodes are // translated back into HIR code (e.g. by loop optimizations or BCE). int64_t value = -1; - if (IsIntAndGet(a, &value)) { + if (IsExact(a, &value)) { if (value == 0) { // Simplify 0 + b = b, 0 * b = 0. if (op == kAdd) { @@ -750,7 +763,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv } } } - if (IsIntAndGet(b, &value)) { + if (IsExact(b, &value)) { if (value == 0) { // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0. if (op == kAdd || op == kSub) { @@ -784,29 +797,16 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr); } -bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) { - if (info != nullptr && info->induction_class == kInvariant) { - // A direct constant fetch. - if (info->operation == kFetch) { - DCHECK(info->fetch); - if (info->fetch->IsIntConstant()) { - *value = info->fetch->AsIntConstant()->GetValue(); - return true; - } else if (info->fetch->IsLongConstant()) { - *value = info->fetch->AsLongConstant()->GetValue(); - return true; - } - } - // Use range analysis to resolve compound values. - InductionVarRange range(this); - int32_t min_val = 0; - int32_t max_val = 0; - if (range.IsConstantRange(info, &min_val, &max_val) && min_val == max_val) { - *value = min_val; - return true; - } - } - return false; +bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) { + return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value); +} + +bool HInductionVarAnalysis::IsAtMost(InductionInfo* info, int64_t* value) { + return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtMost, value); +} + +bool HInductionVarAnalysis::IsAtLeast(InductionInfo* info, int64_t* value) { + return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtLeast, value); } bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 84d5d82568..94d2646aec 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -189,7 +189,9 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b); // Constants. - bool IsIntAndGet(InductionInfo* info, int64_t* value); + bool IsExact(InductionInfo* info, /*out*/ int64_t* value); + bool IsAtMost(InductionInfo* info, /*out*/ int64_t* value); + bool IsAtLeast(InductionInfo* info, /*out*/ int64_t* value); // Helpers. static bool InductionEqual(InductionInfo* info1, InductionInfo* info2); diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 9566c29adf..f9b6910acd 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -45,17 +45,14 @@ static bool IsSafeDiv(int32_t c1, int32_t c2) { return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2)); } -/** Returns true for 32/64-bit integral constant. */ -static bool IsIntAndGet(HInstruction* instruction, int32_t* value) { +/** Returns true for 32/64-bit constant instruction. */ +static bool IsIntAndGet(HInstruction* instruction, int64_t* value) { if (instruction->IsIntConstant()) { *value = instruction->AsIntConstant()->GetValue(); return true; } else if (instruction->IsLongConstant()) { - const int64_t c = instruction->AsLongConstant()->GetValue(); - if (CanLongValueFitIntoInt(c)) { - *value = static_cast<int32_t>(c); - return true; - } + *value = instruction->AsLongConstant()->GetValue(); + return true; } return false; } @@ -65,8 +62,9 @@ static bool IsIntAndGet(HInstruction* instruction, int32_t* value) { * because length >= 0 is true. This makes it more likely the bound is useful to clients. */ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { - int32_t value; - if (v.a_constant > 1 && + int64_t value; + if (v.is_known && + v.a_constant > 1 && v.instruction->IsDiv() && v.instruction->InputAt(0)->IsArrayLength() && IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { @@ -75,6 +73,16 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } +/** Helper method to test for a constant value. */ +static bool IsConstantValue(InductionVarRange::Value v) { + return v.is_known && v.a_constant == 0; +} + +/** Helper method to test for same constant value. */ +static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) { + return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant; +} + /** Helper method to insert an instruction. */ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); @@ -99,29 +107,45 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, /*out*/Value* max_val, /*out*/bool* needs_finite_test) { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* info = - induction_analysis_->LookupInfo(loop, instruction); - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - // Find range. - *min_val = GetVal(info, trip, in_body, /* is_min */ true); - *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); - *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); - return true; + if (loop == nullptr) { + return false; // no loop + } + HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); + if (info == nullptr) { + return false; // no induction information } - return false; // Nothing known + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + // Find range. + *min_val = GetVal(info, trip, in_body, /* is_min */ true); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + return true; } -bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const { - Value v1 = RefineOuter(*min_val, /* is_min */ true); - Value v2 = RefineOuter(*max_val, /* is_min */ false); - if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) { - *min_val = v1; - *max_val = v2; +bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val, + /*in-out*/ Value* max_val) const { + Value v1_min = RefineOuter(*min_val, /* is_min */ true); + Value v2_max = RefineOuter(*max_val, /* is_min */ false); + // The refined range is safe if both sides refine the same instruction. Otherwise, since two + // different ranges are combined, the new refined range is safe to pass back to the client if + // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur. + if (min_val->instruction != max_val->instruction) { + Value v1_max = RefineOuter(*min_val, /* is_min */ false); + Value v2_min = RefineOuter(*max_val, /* is_min */ true); + if (!IsConstantValue(v1_max) || + !IsConstantValue(v2_min) || + v1_max.b_constant > v2_min.b_constant) { + return false; + } + } + // Did something change? + if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) { + *min_val = v1_min; + *max_val = v2_max; return true; } return false; @@ -164,6 +188,46 @@ void InductionVarRange::GenerateTakenTest(HInstruction* context, // Private class methods. // +bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, + ConstantRequest request, + /*out*/ int64_t *value) const { + if (info != nullptr) { + // A direct 32-bit or 64-bit constant fetch. This immediately satisfies + // any of the three requests (kExact, kAtMost, and KAtLeast). + if (info->induction_class == HInductionVarAnalysis::kInvariant && + info->operation == HInductionVarAnalysis::kFetch) { + if (IsIntAndGet(info->fetch, value)) { + return true; + } + } + // Try range analysis while traversing outward on loops. + bool in_body = true; // no known trip count + Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true); + Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false); + do { + // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies. + if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) { + if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) { + *value = v_max.b_constant; + return true; + } else if (request == kAtLeast) { + *value = v_min.b_constant; + return true; + } + } + } while (RefineOuter(&v_min, &v_max)); + // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies + // (e.g. array length == maxint and c == 1 would yield minint). + if (request == kAtLeast) { + if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) { + *value = v_min.b_constant; + return true; + } + } + } + return false; +} + bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const { if (info != nullptr) { if (info->induction_class == HInductionVarAnalysis::kLinear) { @@ -206,12 +270,10 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind if (trip != nullptr) { HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a; if (trip_expr->operation == HInductionVarAnalysis::kSub) { - int32_t min_value = 0; - int32_t stride_value = 0; - if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) { + int64_t stride_value = 0; + if (IsConstant(info->op_a, kExact, &stride_value)) { if (!is_min && stride_value == 1) { - // Test original trip's negative operand (trip_expr->op_b) against - // the offset of the linear induction. + // Test original trip's negative operand (trip_expr->op_b) against offset of induction. if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) { // Analyze cancelled trip with just the positive operand (trip_expr->op_a). HInductionVarAnalysis::InductionInfo cancelled_trip( @@ -219,8 +281,7 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind return GetVal(&cancelled_trip, trip, in_body, is_min); } } else if (is_min && stride_value == -1) { - // Test original trip's positive operand (trip_expr->op_a) against - // the offset of the linear induction. + // Test original trip's positive operand (trip_expr->op_a) against offset of induction. if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) { // Analyze cancelled trip with just the negative operand (trip_expr->op_b). HInductionVarAnalysis::InductionInfo neg( @@ -248,14 +309,16 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, bool is_min) const { // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes // more likely range analysis will compare the same instructions as terminal nodes. - int32_t value; - if (IsIntAndGet(instruction, &value)) { - return Value(value); + int64_t value; + if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { + return Value(static_cast<int32_t>(value)); } else if (instruction->IsAdd()) { - if (IsIntAndGet(instruction->InputAt(0), &value)) { - return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); - } else if (IsIntAndGet(instruction->InputAt(1), &value)) { - return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value)); + if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { + return AddValue(Value(static_cast<int32_t>(value)), + GetFetch(instruction->InputAt(1), trip, in_body, is_min)); + } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) { + return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), + Value(static_cast<int32_t>(value))); } } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); @@ -331,29 +394,30 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Try to refine certain failure. - if (v1_min.a_constant && v1_max.a_constant) { - v1_min = RefineOuter(v1_min, /* is_min */ true); - v1_max = RefineOuter(v1_max, /* is_min */ false); - } - // Positive or negative range? - if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { - // Positive range vs. positive or negative range. - if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return is_min ? MulValue(v1_min, v2_min) - : MulValue(v1_max, v2_max); - } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return is_min ? MulValue(v1_max, v2_min) - : MulValue(v1_min, v2_max); + // Try to refine first operand. + if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) { + RefineOuter(&v1_min, &v1_max); + } + // Constant times range. + if (IsSameConstantValue(v1_min, v1_max)) { + return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min); + } else if (IsSameConstantValue(v2_min, v2_max)) { + return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min); + } + // Positive range vs. positive or negative range. + if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { + if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { + return is_min ? MulValue(v1_min, v2_min) : MulValue(v1_max, v2_max); + } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) { + return is_min ? MulValue(v1_max, v2_min) : MulValue(v1_min, v2_max); } - } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) { - // Negative range vs. positive or negative range. - if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return is_min ? MulValue(v1_min, v2_max) - : MulValue(v1_max, v2_min); - } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return is_min ? MulValue(v1_max, v2_max) - : MulValue(v1_min, v2_min); + } + // Negative range vs. positive or negative range. + if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) { + if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { + return is_min ? MulValue(v1_min, v2_max) : MulValue(v1_max, v2_min); + } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) { + return is_min ? MulValue(v1_max, v2_max) : MulValue(v1_min, v2_min); } } return Value(); @@ -368,43 +432,41 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Positive or negative range? - if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { - // Positive range vs. positive or negative range. - if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return is_min ? DivValue(v1_min, v2_max) - : DivValue(v1_max, v2_min); - } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return is_min ? DivValue(v1_max, v2_max) - : DivValue(v1_min, v2_min); + // Range divided by constant. + if (IsSameConstantValue(v2_min, v2_max)) { + return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min); + } + // Positive range vs. positive or negative range. + if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { + if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { + return is_min ? DivValue(v1_min, v2_max) : DivValue(v1_max, v2_min); + } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) { + return is_min ? DivValue(v1_max, v2_max) : DivValue(v1_min, v2_min); } - } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) { - // Negative range vs. positive or negative range. - if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return is_min ? DivValue(v1_min, v2_min) - : DivValue(v1_max, v2_max); - } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return is_min ? DivValue(v1_max, v2_min) - : DivValue(v1_min, v2_max); + } + // Negative range vs. positive or negative range. + if (IsConstantValue(v1_max) && v1_max.b_constant <= 0) { + if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { + return is_min ? DivValue(v1_min, v2_min) : DivValue(v1_max, v2_max); + } else if (IsConstantValue(v2_max) && v2_max.b_constant <= 0) { + return is_min ? DivValue(v1_max, v2_min) : DivValue(v1_min, v2_max); } } return Value(); } -bool InductionVarRange::IsConstantRange(HInductionVarAnalysis::InductionInfo* info, - int32_t *min_value, - int32_t *max_value) const { - bool in_body = true; // no known trip count - Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true); - Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false); - do { - if (v_min.is_known && v_min.a_constant == 0 && v_max.is_known && v_max.a_constant == 0) { - *min_value = v_min.b_constant; - *max_value = v_max.b_constant; - return true; - } - } while (RefineOuter(&v_min, &v_max)); - return false; +InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min, + Value v_max, + Value c, + bool is_min) const { + return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c); +} + +InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min, + Value v_max, + Value c, + bool is_min) const { + return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c); } InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { @@ -471,22 +533,25 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is } InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const { - if (v.instruction != nullptr) { - HLoopInformation* loop = - v.instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - // Set up loop information. - bool in_body = true; // use is always in body of outer loop - HInductionVarAnalysis::InductionInfo* info = - induction_analysis_->LookupInfo(loop, v.instruction); - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction()); - // Try to refine "a x instruction + b" with outer loop range information on instruction. - return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), - Value(v.b_constant)); - } + if (v.instruction == nullptr) { + return v; // nothing to refine } - return v; + HLoopInformation* loop = + v.instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop == nullptr) { + return v; // no loop + } + HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction); + if (info == nullptr) { + return v; // no induction information + } + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = true; // inner always in more outer + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + // Try to refine "a x instruction + b" with outer loop range information on instruction. + return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant)); } bool InductionVarRange::GenerateCode(HInstruction* context, @@ -499,44 +564,45 @@ bool InductionVarRange::GenerateCode(HInstruction* context, /*out*/bool* needs_finite_test, /*out*/bool* needs_taken_test) const { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* info = - induction_analysis_->LookupInfo(loop, instruction); - if (info == nullptr) { - return false; // nothing to analyze - } - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - // Determine what tests are needed. A finite test is needed if the evaluation code uses the - // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" - // the computed range). A taken test is needed for any unknown trip-count, even if evaluation - // code does not use the trip-count explicitly (since there could be an implicit relation - // between e.g. an invariant subscript and a not-taken condition). - *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); - *needs_taken_test = IsBodyTripCount(trip); - // Code generation for taken test: generate the code when requested or otherwise analyze - // if code generation is feasible when taken test is needed. - if (taken_test != nullptr) { - return GenerateCode( - trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); - } else if (*needs_taken_test) { - if (!GenerateCode( - trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { - return false; - } + if (loop == nullptr) { + return false; // no loop + } + HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); + if (info == nullptr) { + return false; // no induction information + } + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + if (trip == nullptr) { + return false; // codegen relies on trip count + } + // Determine what tests are needed. A finite test is needed if the evaluation code uses the + // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" + // the computed range). A taken test is needed for any unknown trip-count, even if evaluation + // code does not use the trip-count explicitly (since there could be an implicit relation + // between e.g. an invariant subscript and a not-taken condition). + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *needs_taken_test = IsBodyTripCount(trip); + // Code generation for taken test: generate the code when requested or otherwise analyze + // if code generation is feasible when taken test is needed. + if (taken_test != nullptr) { + return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + } else if (*needs_taken_test) { + if (!GenerateCode( + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + return false; } - // Code generation for lower and upper. - return - // Success on lower if invariant (not set), or code can be generated. - ((info->induction_class == HInductionVarAnalysis::kInvariant) || - GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && - // And success on upper. - GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); } - return false; + // Code generation for lower and upper. + return + // Success on lower if invariant (not set), or code can be generated. + ((info->induction_class == HInductionVarAnalysis::kInvariant) || + GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && + // And success on upper. + GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); } bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, @@ -639,9 +705,8 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, case HInductionVarAnalysis::kLinear: { // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only // to avoid arithmetic wrap-around situations that are hard to guard against. - int32_t min_value = 0; - int32_t stride_value = 0; - if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) { + int64_t stride_value = 0; + if (IsConstant(info->op_a, kExact, &stride_value)) { if (stride_value == 1 || stride_value == -1) { const bool is_min_a = stride_value == 1 ? is_min : !is_min; if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && @@ -666,7 +731,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, // Wrap-around and periodic inductions are restricted to constants only, so that extreme // values are easy to test at runtime without complications of arithmetic wrap-around. Value extreme = GetVal(info, trip, in_body, is_min); - if (extreme.is_known && extreme.a_constant == 0) { + if (IsConstantValue(extreme)) { if (graph != nullptr) { *result = graph->GetIntConstant(extreme.b_constant); } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 3cb7b4bfd5..0af41560ff 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -69,7 +69,8 @@ class InductionVarRange { /*out*/ bool* needs_finite_test); /** Refines the values with induction of next outer loop. Returns true on change. */ - bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const; + bool RefineOuter(/*in-out*/ Value* min_val, + /*in-out*/ Value* max_val) const; /** * Returns true if range analysis is able to generate code for the lower and upper @@ -116,6 +117,23 @@ class InductionVarRange { /*out*/ HInstruction** taken_test); private: + /* + * Enum used in IsConstant() request. + */ + enum ConstantRequest { + kExact, + kAtMost, + kAtLeast + }; + + /** + * Returns true if exact or upper/lower bound on the given induction + * information is known as a 64-bit constant, which is returned in value. + */ + bool IsConstant(HInductionVarAnalysis::InductionInfo* info, + ConstantRequest request, + /*out*/ int64_t *value) const; + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const; bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const; bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const; @@ -143,9 +161,8 @@ class InductionVarRange { bool in_body, bool is_min) const; - bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info, - int32_t *min_value, - int32_t *max_value) const; + Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; + Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; Value AddValue(Value v1, Value v2) const; Value SubValue(Value v1, Value v2) const; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 55a654e301..c5c33bd9bc 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -215,10 +215,16 @@ class InductionVarRangeTest : public CommonCompilerTest { return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); } - bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info, - int32_t* min_value, - int32_t* max_value) { - return range_.IsConstantRange(info, min_value, max_value); + bool IsExact(HInductionVarAnalysis::InductionInfo* info, int64_t* value) { + return range_.IsConstant(info, InductionVarRange::kExact, value); + } + + bool IsAtMost(HInductionVarAnalysis::InductionInfo* info, int64_t* value) { + return range_.IsConstant(info, InductionVarRange::kAtMost, value); + } + + bool IsAtLeast(HInductionVarAnalysis::InductionInfo* info, int64_t* value) { + return range_.IsConstant(info, InductionVarRange::kAtLeast, value); } Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); } @@ -249,6 +255,34 @@ class InductionVarRangeTest : public CommonCompilerTest { // Tests on private methods. // +TEST_F(InductionVarRangeTest, IsConstant) { + int64_t value; + // Constant. + EXPECT_TRUE(IsExact(CreateConst(12345), &value)); + EXPECT_EQ(12345, value); + EXPECT_TRUE(IsAtMost(CreateConst(12345), &value)); + EXPECT_EQ(12345, value); + EXPECT_TRUE(IsAtLeast(CreateConst(12345), &value)); + EXPECT_EQ(12345, value); + // Constant trivial range. + EXPECT_TRUE(IsExact(CreateRange(111, 111), &value)); + EXPECT_EQ(111, value); + EXPECT_TRUE(IsAtMost(CreateRange(111, 111), &value)); + EXPECT_EQ(111, value); + EXPECT_TRUE(IsAtLeast(CreateRange(111, 111), &value)); + EXPECT_EQ(111, value); + // Constant non-trivial range. + EXPECT_FALSE(IsExact(CreateRange(11, 22), &value)); + EXPECT_TRUE(IsAtMost(CreateRange(11, 22), &value)); + EXPECT_EQ(22, value); + EXPECT_TRUE(IsAtLeast(CreateRange(11, 22), &value)); + EXPECT_EQ(11, value); + // Symbolic. + EXPECT_FALSE(IsExact(CreateFetch(x_), &value)); + EXPECT_FALSE(IsAtMost(CreateFetch(x_), &value)); + EXPECT_FALSE(IsAtLeast(CreateFetch(x_), &value)); +} + TEST_F(InductionVarRangeTest, TripCountProperties) { EXPECT_FALSE(NeedsTripCount(nullptr)); EXPECT_FALSE(NeedsTripCount(CreateConst(1))); @@ -367,6 +401,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxPeriodic) { } TEST_F(InductionVarRangeTest, GetMulMin) { + ExpectEqual(Value(-14), GetMul(CreateConst(2), CreateRange(-7, 8), true)); + ExpectEqual(Value(-16), GetMul(CreateConst(-2), CreateRange(-7, 8), true)); + ExpectEqual(Value(-14), GetMul(CreateRange(-7, 8), CreateConst(2), true)); + ExpectEqual(Value(-16), GetMul(CreateRange(-7, 8), CreateConst(-2), true)); ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true)); ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true)); ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true)); @@ -379,6 +417,10 @@ TEST_F(InductionVarRangeTest, GetMulMin) { } TEST_F(InductionVarRangeTest, GetMulMax) { + ExpectEqual(Value(16), GetMul(CreateConst(2), CreateRange(-7, 8), false)); + ExpectEqual(Value(14), GetMul(CreateConst(-2), CreateRange(-7, 8), false)); + ExpectEqual(Value(16), GetMul(CreateRange(-7, 8), CreateConst(2), false)); + ExpectEqual(Value(14), GetMul(CreateRange(-7, 8), CreateConst(-2), false)); ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false)); ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false)); ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false)); @@ -391,6 +433,8 @@ TEST_F(InductionVarRangeTest, GetMulMax) { } TEST_F(InductionVarRangeTest, GetDivMin) { + ExpectEqual(Value(-5), GetDiv(CreateRange(-10, 20), CreateConst(2), true)); + ExpectEqual(Value(-10), GetDiv(CreateRange(-10, 20), CreateConst(-2), true)); ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true)); ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true)); ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true)); @@ -403,6 +447,8 @@ TEST_F(InductionVarRangeTest, GetDivMin) { } TEST_F(InductionVarRangeTest, GetDivMax) { + ExpectEqual(Value(10), GetDiv(CreateRange(-10, 20), CreateConst(2), false)); + ExpectEqual(Value(5), GetDiv(CreateRange(-10, 20), CreateConst(-2), false)); ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false)); ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false)); ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false)); @@ -414,18 +460,6 @@ TEST_F(InductionVarRangeTest, GetDivMax) { ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false)); } -TEST_F(InductionVarRangeTest, IsConstantRange) { - int32_t min_value; - int32_t max_value; - ASSERT_TRUE(IsConstantRange(CreateConst(12345), &min_value, &max_value)); - EXPECT_EQ(12345, min_value); - EXPECT_EQ(12345, max_value); - ASSERT_TRUE(IsConstantRange(CreateRange(1, 2), &min_value, &max_value)); - EXPECT_EQ(1, min_value); - EXPECT_EQ(2, max_value); - EXPECT_FALSE(IsConstantRange(CreateFetch(x_), &min_value, &max_value)); -} - TEST_F(InductionVarRangeTest, AddValue) { ExpectEqual(Value(110), AddValue(Value(10), Value(100))); ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1))); @@ -459,6 +493,24 @@ TEST_F(InductionVarRangeTest, MulValue) { ExpectEqual(Value(), MulValue(Value(90000), Value(-90000))); // unsafe } +TEST_F(InductionVarRangeTest, MulValueSpecial) { + const int32_t min_value = std::numeric_limits<int32_t>::min(); + const int32_t max_value = std::numeric_limits<int32_t>::max(); + + // Unsafe. + ExpectEqual(Value(), MulValue(Value(min_value), Value(min_value))); + ExpectEqual(Value(), MulValue(Value(min_value), Value(-1))); + ExpectEqual(Value(), MulValue(Value(min_value), Value(max_value))); + ExpectEqual(Value(), MulValue(Value(max_value), Value(max_value))); + + // Safe. + ExpectEqual(Value(min_value), MulValue(Value(min_value), Value(1))); + ExpectEqual(Value(max_value), MulValue(Value(max_value), Value(1))); + ExpectEqual(Value(-max_value), MulValue(Value(max_value), Value(-1))); + ExpectEqual(Value(-1), MulValue(Value(1), Value(-1))); + ExpectEqual(Value(1), MulValue(Value(-1), Value(-1))); +} + TEST_F(InductionVarRangeTest, DivValue) { ExpectEqual(Value(25), DivValue(Value(100), Value(4))); ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1))); @@ -468,6 +520,23 @@ TEST_F(InductionVarRangeTest, DivValue) { ExpectEqual(Value(), DivValue(Value(1), Value(0))); // unsafe } +TEST_F(InductionVarRangeTest, DivValueSpecial) { + const int32_t min_value = std::numeric_limits<int32_t>::min(); + const int32_t max_value = std::numeric_limits<int32_t>::max(); + + // Unsafe. + ExpectEqual(Value(), DivValue(Value(min_value), Value(-1))); + + // Safe. + ExpectEqual(Value(1), DivValue(Value(min_value), Value(min_value))); + ExpectEqual(Value(1), DivValue(Value(max_value), Value(max_value))); + ExpectEqual(Value(min_value), DivValue(Value(min_value), Value(1))); + ExpectEqual(Value(max_value), DivValue(Value(max_value), Value(1))); + ExpectEqual(Value(-max_value), DivValue(Value(max_value), Value(-1))); + ExpectEqual(Value(-1), DivValue(Value(1), Value(-1))); + ExpectEqual(Value(1), DivValue(Value(-1), Value(-1))); +} + TEST_F(InductionVarRangeTest, MinValue) { ExpectEqual(Value(10), MinValue(Value(10), Value(100))); ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1))); diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc new file mode 100644 index 0000000000..db1f9a79aa --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_arm.h" +#include "instruction_simplifier_shared.h" + +namespace art { +namespace arm { + +void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) { + if (TryCombineMultiplyAccumulate(instruction, kArm)) { + RecordSimplification(); + } +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h new file mode 100644 index 0000000000..379b95d6ae --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace arm { + +class InstructionSimplifierArmVisitor : public HGraphVisitor { + public: + InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), stats_(stats) {} + + private: + void RecordSimplification() { + if (stats_ != nullptr) { + stats_->RecordStat(kInstructionSimplificationsArch); + } + } + + void VisitMul(HMul* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; +}; + + +class InstructionSimplifierArm : public HOptimization { + public: + InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "instruction_simplifier_arm", stats) {} + + void Run() OVERRIDE { + InstructionSimplifierArmVisitor visitor(graph_, stats_); + visitor.VisitReversePostOrder(); + } +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 4bcfc54791..c2bbdccc29 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -17,6 +17,7 @@ #include "instruction_simplifier_arm64.h" #include "common_arm64.h" +#include "instruction_simplifier_shared.h" #include "mirror/array-inl.h" namespace art { @@ -179,67 +180,53 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc return true; } -bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( - HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { - DCHECK(Primitive::IsIntOrLongType(mul->GetType())); - DCHECK(input_binop->IsAdd() || input_binop->IsSub()); - DCHECK_NE(input_binop, input_other); - if (!input_binop->HasOnlyOneNonEnvironmentUse()) { - return false; - } - - // Try to interpret patterns like - // a * (b <+/-> 1) - // as - // (a * b) <+/-> a - HInstruction* input_a = input_other; - HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. - HInstruction::InstructionKind op_kind; - - if (input_binop->IsAdd()) { - if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { - // Interpret - // a * (b + 1) - // as - // (a * b) + a - input_b = input_binop->GetLeastConstantLeft(); - op_kind = HInstruction::kAdd; - } - } else { - DCHECK(input_binop->IsSub()); - if (input_binop->GetRight()->IsConstant() && - input_binop->GetRight()->AsConstant()->IsMinusOne()) { - // Interpret - // a * (b - (-1)) - // as - // a + (a * b) - input_b = input_binop->GetLeft(); - op_kind = HInstruction::kAdd; - } else if (input_binop->GetLeft()->IsConstant() && - input_binop->GetLeft()->AsConstant()->IsOne()) { - // Interpret - // a * (1 - b) - // as - // a - (a * b) - input_b = input_binop->GetRight(); - op_kind = HInstruction::kSub; +bool InstructionSimplifierArm64Visitor::TryMergeNegatedInput(HBinaryOperation* op) { + DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName(); + HInstruction* left = op->GetLeft(); + HInstruction* right = op->GetRight(); + + // Only consider the case where there is exactly one Not, with 2 Not's De + // Morgan's laws should be applied instead. + if (left->IsNot() ^ right->IsNot()) { + HInstruction* hnot = (left->IsNot() ? left : right); + HInstruction* hother = (left->IsNot() ? right : left); + + // Only do the simplification if the Not has only one use and can thus be + // safely removed. Even though ARM64 negated bitwise operations do not have + // an immediate variant (only register), we still do the simplification when + // `hother` is a constant, because it removes an instruction if the constant + // cannot be encoded as an immediate: + // mov r0, #large_constant + // neg r2, r1 + // and r0, r0, r2 + // becomes: + // mov r0, #large_constant + // bic r0, r0, r1 + if (hnot->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NOT tmp, mask + // AND dst, src, tmp (respectively ORR, EOR) + // with + // BIC dst, src, mask (respectively ORN, EON) + HInstruction* src = hnot->AsNot()->GetInput(); + + HArm64BitwiseNegatedRight* neg_op = new (GetGraph()->GetArena()) + HArm64BitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc()); + + op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op); + hnot->GetBlock()->RemoveInstruction(hnot); + RecordSimplification(); + return true; } } - if (input_b == nullptr) { - // We did not find a pattern we can optimize. - return false; - } - - HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( - mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); - - mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); - input_binop->GetBlock()->RemoveInstruction(input_binop); - return false; } +void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { + TryMergeNegatedInput(instruction); +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -255,76 +242,13 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { - Primitive::Type type = instruction->GetType(); - if (!Primitive::IsIntOrLongType(type)) { - return; - } - - HInstruction* use = instruction->HasNonEnvironmentUses() - ? instruction->GetUses().GetFirst()->GetUser() - : nullptr; - - if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { - // Replace code looking like - // MUL tmp, x, y - // SUB dst, acc, tmp - // with - // MULSUB dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HBinaryOperation* binop = use->AsBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // Be careful after GVN. This should not happen since the `HMul` has only - // one use. - DCHECK_NE(binop_left, binop_right); - if (binop_right == instruction) { - accumulator = binop_left; - } else if (use->IsAdd()) { - DCHECK_EQ(binop_left, instruction); - accumulator = binop_right; - } - - if (accumulator != nullptr) { - HArm64MultiplyAccumulate* mulacc = - new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, - binop->GetKind(), - accumulator, - instruction->GetLeft(), - instruction->GetRight()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!instruction->HasUses()); - instruction->GetBlock()->RemoveInstruction(instruction); - RecordSimplification(); - return; - } - } - - // Use multiply accumulate instruction for a few simple patterns. - // We prefer not applying the following transformations if the left and - // right inputs perform the same operation. - // We rely on GVN having squashed the inputs if appropriate. However the - // results are still correct even if that did not happen. - if (instruction->GetLeft() == instruction->GetRight()) { - return; + if (TryCombineMultiplyAccumulate(instruction, kArm64)) { + RecordSimplification(); } +} - HInstruction* left = instruction->GetLeft(); - HInstruction* right = instruction->GetRight(); - if ((right->IsAdd() || right->IsSub()) && - TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { - return; - } - if ((left->IsAdd() || left->IsSub()) && - TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { - return; - } +void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) { + TryMergeNegatedInput(instruction); } void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { @@ -359,5 +283,9 @@ void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { } } +void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { + TryMergeNegatedInput(instruction); +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index b7f490bb8c..cf8458713f 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -51,18 +51,21 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { return TryMergeIntoShifterOperand(use, bitfield_op, true); } - bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, - HBinaryOperation* input_binop, - HInstruction* input_other); + // For bitwise operations (And/Or/Xor) with a negated input, try to use + // a negated bitwise instruction. + bool TryMergeNegatedInput(HBinaryOperation* op); // HInstruction visitors, sorted alphabetically. + void VisitAnd(HAnd* instruction) OVERRIDE; void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; void VisitShl(HShl* instruction) OVERRIDE; void VisitShr(HShr* instruction) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc new file mode 100644 index 0000000000..45d196fa6d --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_shared.h" + +namespace art { + +namespace { + +bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return true; +} + +} // namespace + +bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { + Primitive::Type type = mul->GetType(); + switch (isa) { + case kArm: + case kThumb2: + if (type != Primitive::kPrimInt) { + return false; + } + break; + case kArm64: + if (!Primitive::IsIntOrLongType(type)) { + return false; + } + break; + default: + return false; + } + + HInstruction* use = mul->HasNonEnvironmentUses() + ? mul->GetUses().GetFirst()->GetUser() + : nullptr; + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + if (use->IsAdd() || use->IsSub()) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HMultiplyAccumulate* mulacc = + new (arena) HMultiplyAccumulate(type, + binop->GetKind(), + accumulator, + mul->GetLeft(), + mul->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } else if (use->IsNeg() && isa != kArm) { + HMultiplyAccumulate* mulacc = + new (arena) HMultiplyAccumulate(type, + HInstruction::kSub, + mul->GetBlock()->GetGraph()->GetConstant(type, 0), + mul->GetLeft(), + mul->GetRight()); + + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (mul->GetLeft() == mul->GetRight()) { + return false; + } + + HInstruction* left = mul->GetLeft(); + HInstruction* right = mul->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) { + return true; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) { + return true; + } + return false; +} + +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h new file mode 100644 index 0000000000..9832ecc058 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ + +#include "nodes.h" + +namespace art { + +bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index ea8669fa18..8cbdcbbcaf 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1825,6 +1825,90 @@ void IntrinsicCodeGeneratorARM::VisitMathNextAfter(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter); } +void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register out = locations->Out().AsRegister<Register>(); + Register in = locations->InAt(0).AsRegister<Register>(); + + __ rbit(out, in); +} + +void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ rbit(out_reg_lo, in_reg_hi); + __ rbit(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register out = locations->Out().AsRegister<Register>(); + Register in = locations->InAt(0).AsRegister<Register>(); + + __ rev(out, in); +} + +void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ rev(out_reg_lo, in_reg_hi); + __ rev(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register out = locations->Out().AsRegister<Register>(); + Register in = locations->InAt(0).AsRegister<Register>(); + + __ revsh(out, in); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1834,12 +1918,7 @@ void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) } UNIMPLEMENTED_INTRINSIC(IntegerBitCount) -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) UNIMPLEMENTED_INTRINSIC(LongBitCount) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) -UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 8741fd284f..b5f15fe22d 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -99,7 +99,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { // restored! class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit IntrinsicSlowPathARM64(HInvoke* invoke) : invoke_(invoke) { } + explicit IntrinsicSlowPathARM64(HInvoke* invoke) + : SlowPathCodeARM64(invoke), invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index c8629644b6..2f183c3a62 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -99,7 +99,7 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS* codegen) { // restored! class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : invoke_(invoke) { } + explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in); @@ -407,7 +407,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { Primitive::kPrimInt, IsR2OrNewer(), IsR6(), - false, + /* reverseBits */ false, GetAssembler()); } @@ -421,7 +421,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { Primitive::kPrimLong, IsR2OrNewer(), IsR6(), - false, + /* reverseBits */ false, GetAssembler()); } @@ -435,7 +435,7 @@ void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { Primitive::kPrimShort, IsR2OrNewer(), IsR6(), - false, + /* reverseBits */ false, GetAssembler()); } @@ -475,7 +475,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), false, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -484,7 +484,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invok } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), true, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -497,7 +497,6 @@ static void GenNumberOfTrailingZeroes(LocationSummary* locations, Register in; if (is64bit) { - MipsLabel done; Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); @@ -588,7 +587,11 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), false, IsR6(), IsR2OrNewer(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), + /* is64bit */ false, + IsR6(), + IsR2OrNewer(), + GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -597,7 +600,11 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), true, IsR6(), IsR2OrNewer(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), + /* is64bit */ true, + IsR6(), + IsR2OrNewer(), + GetAssembler()); } enum RotationDirection { @@ -806,7 +813,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { Primitive::kPrimInt, IsR2OrNewer(), IsR6(), - true, + /* reverseBits */ true, GetAssembler()); } @@ -820,10 +827,561 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { Primitive::kPrimLong, IsR2OrNewer(), IsR6(), - true, + /* reverseBits */ true, GetAssembler()); } +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (is64bit) { + __ AbsD(out, in); + } else { + __ AbsS(out, in); + } +} + +// double java.lang.Math.abs(double) +void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +// float java.lang.Math.abs(float) +void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + } +} + +// int java.lang.Math.abs(int) +void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + +// long java.lang.Math.abs(long) +void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + Primitive::Type type, + bool is_R6, + MipsAssembler* assembler) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (is_R6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == Primitive::kPrimDouble) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, Primitive::kPrimFloat); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + } else { + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == Primitive::kPrimDouble) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, Primitive::kPrimFloat); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == Primitive::kPrimDouble) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == Primitive::kPrimDouble) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == Primitive::kPrimDouble) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == Primitive::kPrimDouble) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == Primitive::kPrimDouble) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == Primitive::kPrimDouble) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); +} + +// double java.lang.Math.min(double, double) +void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ true, + Primitive::kPrimDouble, + IsR6(), + GetAssembler()); +} + +// float java.lang.Math.min(float, float) +void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ true, + Primitive::kPrimFloat, + IsR6(), + GetAssembler()); +} + +// double java.lang.Math.max(double, double) +void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ false, + Primitive::kPrimDouble, + IsR6(), + GetAssembler()); +} + +// float java.lang.Math.max(float, float) +void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), + /* is_min */ false, + Primitive::kPrimFloat, + IsR6(), + GetAssembler()); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + Primitive::Type type, + bool is_R6, + MipsAssembler* assembler) { + if (is_R6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == Primitive::kPrimLong) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, Primitive::kPrimInt); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { + if (type == Primitive::kPrimLong) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, Primitive::kPrimInt); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +// int java.lang.Math.min(int, int) +void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), + /* is_min */ true, + Primitive::kPrimInt, + IsR6(), + GetAssembler()); +} + +// long java.lang.Math.min(long, long) +void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), + /* is_min */ true, + Primitive::kPrimLong, + IsR6(), + GetAssembler()); +} + +// int java.lang.Math.max(int, int) +void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), + /* is_min */ false, + Primitive::kPrimInt, + IsR6(), + GetAssembler()); +} + +// long java.lang.Math.max(long, long) +void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), + /* is_min */ false, + Primitive::kPrimLong, + IsR6(), + GetAssembler()); +} + +// double java.lang.Math.sqrt(double) +void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + MipsAssembler* assembler = GetAssembler(); + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + __ SqrtD(out, in); +} + // byte libcore.io.Memory.peekByte(long address) void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); @@ -1151,19 +1709,6 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(LongBitCount) -UNIMPLEMENTED_INTRINSIC(MathAbsDouble) -UNIMPLEMENTED_INTRINSIC(MathAbsFloat) -UNIMPLEMENTED_INTRINSIC(MathAbsInt) -UNIMPLEMENTED_INTRINSIC(MathAbsLong) -UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) -UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) -UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble) -UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat) -UNIMPLEMENTED_INTRINSIC(MathMinIntInt) -UNIMPLEMENTED_INTRINSIC(MathMinLongLong) -UNIMPLEMENTED_INTRINSIC(MathMaxIntInt) -UNIMPLEMENTED_INTRINSIC(MathMaxLongLong) -UNIMPLEMENTED_INTRINSIC(MathSqrt) UNIMPLEMENTED_INTRINSIC(MathCeil) UNIMPLEMENTED_INTRINSIC(MathFloor) UNIMPLEMENTED_INTRINSIC(MathRint) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index cf3a3657de..bd4f5329da 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -87,7 +87,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { // restored! class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : invoke_(invoke) { } + explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) + : SlowPathCodeMIPS64(invoke), invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in); @@ -580,25 +581,71 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { static void GenMinMaxFP(LocationSummary* locations, bool is_min, - bool is_double, + Primitive::Type type, Mips64Assembler* assembler) { - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - if (is_double) { + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == Primitive::kPrimDouble) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + if (is_min) { - __ MinD(out, lhs, rhs); + __ MinD(out, a, b); } else { - __ MaxD(out, lhs, rhs); + __ MaxD(out, a, b); } } else { + DCHECK_EQ(type, Primitive::kPrimFloat); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + if (is_min) { - __ MinS(out, lhs, rhs); + __ MinS(out, a, b); } else { - __ MaxS(out, lhs, rhs); + __ MaxS(out, a, b); } } + + __ Bind(&done); } static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -616,7 +663,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimDouble, GetAssembler()); } // float java.lang.Math.min(float, float) @@ -625,7 +672,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimFloat, GetAssembler()); } // double java.lang.Math.max(double, double) @@ -634,7 +681,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimDouble, GetAssembler()); } // float java.lang.Math.max(float, float) @@ -643,7 +690,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimFloat, GetAssembler()); } static void GenMinMax(LocationSummary* locations, @@ -653,49 +700,55 @@ static void GenMinMax(LocationSummary* locations, GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); } } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } } + __ Or(out, out, AT); } - __ Or(out, out, AT); } static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index e70afd29f0..c1f9ae6425 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -39,7 +39,7 @@ namespace art { template <typename TDexCallingConvention> class IntrinsicSlowPath : public SlowPathCode { public: - explicit IntrinsicSlowPath(HInvoke* invoke) : invoke_(invoke) { } + explicit IntrinsicSlowPath(HInvoke* invoke) : SlowPathCode(invoke), invoke_(invoke) { } Location MoveArguments(CodeGenerator* codegen) { TDexCallingConvention calling_convention_visitor; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f36dc6e2fd..f9acb089ee 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1178,19 +1178,19 @@ HConstant* HUnaryOperation::TryStaticEvaluation() const { } HConstant* HBinaryOperation::TryStaticEvaluation() const { - if (GetLeft()->IsIntConstant()) { - if (GetRight()->IsIntConstant()) { - return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant()); - } else if (GetRight()->IsLongConstant()) { - return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsLongConstant()); - } + if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { + return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant()); } else if (GetLeft()->IsLongConstant()) { if (GetRight()->IsIntConstant()) { + // The binop(long, int) case is only valid for shifts and rotations. + DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName(); return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant()); } else if (GetRight()->IsLongConstant()) { return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant()); } } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) { + // The binop(null, null) case is only valid for equal and not-equal conditions. + DCHECK(IsEqual() || IsNotEqual()) << DebugName(); return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant()); } else if (kEnableFloatingPointStaticEvaluation) { if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 399afabea6..c4764ccbb4 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1247,6 +1247,16 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +/* + * Instructions, shared across several (not all) architectures. + */ +#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64) +#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ + M(MultiplyAccumulate, Instruction) +#endif + #ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) #else @@ -1258,9 +1268,9 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + M(Arm64BitwiseNegatedRight, Instruction) \ M(Arm64DataProcWithShifterOp, Instruction) \ - M(Arm64IntermediateAddress, Instruction) \ - M(Arm64MultiplyAccumulate, Instruction) + M(Arm64IntermediateAddress, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1281,6 +1291,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ @@ -2821,20 +2832,15 @@ class HBinaryOperation : public HExpression<2> { // Apply this operation to `x` and `y`. virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, HNullConstant* y ATTRIBUTE_UNUSED) const { - VLOG(compiler) << DebugName() << " is not defined for the (null, null) case."; - return nullptr; + LOG(FATAL) << DebugName() << " is not defined for the (null, null) case."; + UNREACHABLE(); } virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0; virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0; - virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED, - HLongConstant* y ATTRIBUTE_UNUSED) const { - VLOG(compiler) << DebugName() << " is not defined for the (int, long) case."; - return nullptr; - } virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED, HIntConstant* y ATTRIBUTE_UNUSED) const { - VLOG(compiler) << DebugName() << " is not defined for the (long, int) case."; - return nullptr; + LOG(FATAL) << DebugName() << " is not defined for the (long, int) case."; + UNREACHABLE(); } virtual HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const = 0; virtual HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const = 0; @@ -4305,8 +4311,6 @@ class HShl : public HBinaryOperation { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc()); } - // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this - // case is handled as `x << static_cast<int>(y)`. HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); @@ -4351,8 +4355,6 @@ class HShr : public HBinaryOperation { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc()); } - // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this - // case is handled as `x >> static_cast<int>(y)`. HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); @@ -4398,8 +4400,6 @@ class HUShr : public HBinaryOperation { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc()); } - // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this - // case is handled as `x >>> static_cast<int>(y)`. HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); @@ -4435,21 +4435,12 @@ class HAnd : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T, typename U> - auto Compute(T x, U y) const -> decltype(x & y) { return x & y; } + template <typename T> T Compute(T x, T y) const { return x & y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } - HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -4481,21 +4472,12 @@ class HOr : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T, typename U> - auto Compute(T x, U y) const -> decltype(x | y) { return x | y; } + template <typename T> T Compute(T x, T y) const { return x | y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } - HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -4527,21 +4509,12 @@ class HXor : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T, typename U> - auto Compute(T x, U y) const -> decltype(x ^ y) { return x ^ y; } + template <typename T> T Compute(T x, T y) const { return x ^ y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } - HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { - return GetBlock()->GetGraph()->GetLongConstant( - Compute(x->GetValue(), y->GetValue()), GetDexPc()); - } HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -6060,6 +6033,9 @@ class HParallelMove : public HTemplateInstruction<0> { } // namespace art +#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) +#include "nodes_shared.h" +#endif #ifdef ART_ENABLE_CODEGEN_arm #include "nodes_arm.h" #endif diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 445cdab191..75a71e78b8 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -118,38 +118,64 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; -class HArm64MultiplyAccumulate : public HExpression<3> { +class HArm64BitwiseNegatedRight : public HBinaryOperation { public: - HArm64MultiplyAccumulate(Primitive::Type type, - InstructionKind op, - HInstruction* accumulator, - HInstruction* mul_left, - HInstruction* mul_right, - uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { - SetRawInputAt(kInputAccumulatorIndex, accumulator); - SetRawInputAt(kInputMulLeftIndex, mul_left); - SetRawInputAt(kInputMulRightIndex, mul_right); + HArm64BitwiseNegatedRight(Primitive::Type result_type, + InstructionKind op, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc), + op_kind_(op) { + DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op; } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; + template <typename T, typename U> + auto Compute(T x, U y) const -> decltype(x & ~y) { + static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value && + std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value, + "Inconsistent negated bitwise types"); + switch (op_kind_) { + case HInstruction::kAnd: + return x & ~y; + case HInstruction::kOr: + return x | ~y; + case HInstruction::kXor: + return x ^ ~y; + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } + } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + LOG(FATAL) << DebugName() << " is not defined for float values"; + UNREACHABLE(); + } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + LOG(FATAL) << DebugName() << " is not defined for double values"; + UNREACHABLE(); } InstructionKind GetOpKind() const { return op_kind_; } - DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + DECLARE_INSTRUCTION(Arm64BitwiseNegatedRight); private: - // Indicates if this is a MADD or MSUB. - InstructionKind op_kind_; + // Specifies the bitwise operation, which will be then negated. + const InstructionKind op_kind_; - DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); + DISALLOW_COPY_AND_ASSIGN(HArm64BitwiseNegatedRight); }; } // namespace art diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h new file mode 100644 index 0000000000..b04b622838 --- /dev/null +++ b/compiler/optimizing/nodes_shared.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ +#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ + +namespace art { + +class HMultiplyAccumulate : public HExpression<3> { + public: + HMultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsMultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + const InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b1891c979e..5a9f2583fd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -60,6 +60,7 @@ #include "induction_var_analysis.h" #include "inliner.h" #include "instruction_simplifier.h" +#include "instruction_simplifier_arm.h" #include "intrinsics.h" #include "jit/debugger_interface.h" #include "jit/jit_code_cache.h" @@ -438,7 +439,10 @@ static void RunArchOptimizations(InstructionSet instruction_set, case kThumb2: case kArm: { arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); + arm::InstructionSimplifierArm* simplifier = + new (arena) arm::InstructionSimplifierArm(graph, stats); HOptimization* arm_optimizations[] = { + simplifier, fixups }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 4784de1380..54cbdf8b66 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -63,8 +63,7 @@ void StackMapStream::EndStackMapEntry() { void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { if (kind != DexRegisterLocation::Kind::kNone) { // Ensure we only use non-compressed location kind at this stage. - DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) - << DexRegisterLocation::PrettyDescriptor(kind); + DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind; DexRegisterLocation location(kind, value); // Look for Dex register `location` in the location catalog (using the @@ -257,6 +256,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // Ensure we reached the end of the Dex registers location_catalog. DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); + ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false); uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { @@ -268,6 +268,9 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask); if (entry.sp_mask != nullptr) { stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask); + } else { + // The MemoryRegion does not have to be zeroed, so make sure we clear the bits. + stack_map.SetStackMask(stack_map_encoding_, empty_bitmask); } if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { @@ -344,6 +347,11 @@ void StackMapStream::FillIn(MemoryRegion region) { } } } + + // Verify all written data in debug build. + if (kIsDebugBuild) { + CheckCodeInfo(region); + } } void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, @@ -423,4 +431,90 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn return true; } +// Helper for CheckCodeInfo - check that register map has the expected content. +void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, + const DexRegisterMap& dex_register_map, + size_t num_dex_registers, + BitVector* live_dex_registers_mask, + size_t dex_register_locations_index) const { + StackMapEncoding encoding = code_info.ExtractEncoding(); + for (size_t reg = 0; reg < num_dex_registers; reg++) { + // Find the location we tried to encode. + DexRegisterLocation expected = DexRegisterLocation::None(); + if (live_dex_registers_mask->IsBitSet(reg)) { + size_t catalog_index = dex_register_locations_[dex_register_locations_index++]; + expected = location_catalog_entries_[catalog_index]; + } + // Compare to the seen location. + if (expected.GetKind() == DexRegisterLocation::Kind::kNone) { + DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg)); + } else { + DCHECK(dex_register_map.IsDexRegisterLive(reg)); + DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation( + reg, num_dex_registers, code_info, encoding); + DCHECK_EQ(expected.GetKind(), seen.GetKind()); + DCHECK_EQ(expected.GetValue(), seen.GetValue()); + } + } + if (num_dex_registers == 0) { + DCHECK(!dex_register_map.IsValid()); + } +} + +// Check that all StackMapStream inputs are correctly encoded by trying to read them back. +void StackMapStream::CheckCodeInfo(MemoryRegion region) const { + CodeInfo code_info(region); + StackMapEncoding encoding = code_info.ExtractEncoding(); + DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); + for (size_t s = 0; s < stack_maps_.size(); ++s) { + const StackMap stack_map = code_info.GetStackMapAt(s, encoding); + StackMapEntry entry = stack_maps_[s]; + + // Check main stack map fields. + DCHECK_EQ(stack_map.GetNativePcOffset(encoding), entry.native_pc_offset); + DCHECK_EQ(stack_map.GetDexPc(encoding), entry.dex_pc); + DCHECK_EQ(stack_map.GetRegisterMask(encoding), entry.register_mask); + MemoryRegion stack_mask = stack_map.GetStackMask(encoding); + if (entry.sp_mask != nullptr) { + DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); + for (size_t b = 0; b < stack_mask.size_in_bits(); b++) { + DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); + } + } else { + for (size_t b = 0; b < stack_mask.size_in_bits(); b++) { + DCHECK_EQ(stack_mask.LoadBit(b), 0u); + } + } + + CheckDexRegisterMap(code_info, + code_info.GetDexRegisterMapOf( + stack_map, encoding, entry.num_dex_registers), + entry.num_dex_registers, + entry.live_dex_registers_mask, + entry.dex_register_locations_start_index); + + // Check inline info. + DCHECK_EQ(stack_map.HasInlineInfo(encoding), (entry.inlining_depth != 0)); + if (entry.inlining_depth != 0) { + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); + DCHECK_EQ(inline_info.GetDepth(), entry.inlining_depth); + for (size_t d = 0; d < entry.inlining_depth; ++d) { + size_t inline_info_index = entry.inline_infos_start_index + d; + DCHECK_LT(inline_info_index, inline_infos_.size()); + InlineInfoEntry inline_entry = inline_infos_[inline_info_index]; + DCHECK_EQ(inline_info.GetDexPcAtDepth(d), inline_entry.dex_pc); + DCHECK_EQ(inline_info.GetMethodIndexAtDepth(d), inline_entry.method_index); + DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(d), inline_entry.invoke_type); + + CheckDexRegisterMap(code_info, + code_info.GetDexRegisterMapAtDepth( + d, inline_info, encoding, inline_entry.num_dex_registers), + inline_entry.num_dex_registers, + inline_entry.live_dex_registers_mask, + inline_entry.dex_register_locations_start_index); + } + } + } +} + } // namespace art diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index fc27a2b446..016a911424 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -167,6 +167,13 @@ class StackMapStream : public ValueObject { const BitVector& live_dex_registers_mask, uint32_t start_index_in_dex_register_locations) const; + void CheckDexRegisterMap(const CodeInfo& code_info, + const DexRegisterMap& dex_register_map, + size_t num_dex_registers, + BitVector* live_dex_registers_mask, + size_t dex_register_locations_index) const; + void CheckCodeInfo(MemoryRegion region) const; + ArenaAllocator* allocator_; ArenaVector<StackMapEntry> stack_maps_; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index f96376d9fe..a894565425 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -545,6 +545,9 @@ class ArmAssembler : public Assembler { virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0; virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0; virtual void rbit(Register rd, Register rm, Condition cond = AL) = 0; + virtual void rev(Register rd, Register rm, Condition cond = AL) = 0; + virtual void rev16(Register rd, Register rm, Condition cond = AL) = 0; + virtual void revsh(Register rd, Register rm, Condition cond = AL) = 0; // Multiply instructions. virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index ebca25bbf9..0a227b21cd 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -750,6 +750,35 @@ void Arm32Assembler::movt(Register rd, uint16_t imm16, Condition cond) { } +void Arm32Assembler::EmitMiscellaneous(Condition cond, uint8_t op1, + uint8_t op2, uint32_t a_part, + uint32_t rest) { + int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | + B26 | B25 | B23 | + (op1 << 20) | + (a_part << 16) | + (op2 << 5) | + B4 | + rest; + Emit(encoding); +} + + +void Arm32Assembler::EmitReverseBytes(Register rd, Register rm, Condition cond, + uint8_t op1, uint8_t op2) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rm, kNoRegister); + CHECK_NE(cond, kNoCondition); + CHECK_NE(rd, PC); + CHECK_NE(rm, PC); + + int32_t encoding = (static_cast<int32_t>(rd) << kRdShift) | + (0b1111 << 8) | + static_cast<int32_t>(rm); + EmitMiscellaneous(cond, op1, op2, 0b1111, encoding); +} + + void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) { CHECK_NE(rd, kNoRegister); CHECK_NE(rm, kNoRegister); @@ -764,6 +793,21 @@ void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) { } +void Arm32Assembler::rev(Register rd, Register rm, Condition cond) { + EmitReverseBytes(rd, rm, cond, 0b011, 0b001); +} + + +void Arm32Assembler::rev16(Register rd, Register rm, Condition cond) { + EmitReverseBytes(rd, rm, cond, 0b011, 0b101); +} + + +void Arm32Assembler::revsh(Register rd, Register rm, Condition cond) { + EmitReverseBytes(rd, rm, cond, 0b111, 0b101); +} + + void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode, Register rd, Register rn, Register rm, Register rs) { diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index bf332feb62..e3e05caf92 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -91,6 +91,9 @@ class Arm32Assembler FINAL : public ArmAssembler { void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; // Multiply instructions. void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; @@ -388,6 +391,11 @@ class Arm32Assembler FINAL : public ArmAssembler { void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); + void EmitMiscellaneous(Condition cond, uint8_t op1, uint8_t op2, + uint32_t a_part, uint32_t rest); + void EmitReverseBytes(Register rd, Register rm, Condition cond, + uint8_t op1, uint8_t op2); + void EmitBranch(Condition cond, Label* label, bool link); static int32_t EncodeBranchOffset(int offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index 43805966a9..e570e22fca 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -887,4 +887,16 @@ TEST_F(AssemblerArm32Test, rbit) { T3Helper(&arm::Arm32Assembler::rbit, true, "rbit{cond} {reg1}, {reg2}", "rbit"); } +TEST_F(AssemblerArm32Test, rev) { + T3Helper(&arm::Arm32Assembler::rev, true, "rev{cond} {reg1}, {reg2}", "rev"); +} + +TEST_F(AssemblerArm32Test, rev16) { + T3Helper(&arm::Arm32Assembler::rev16, true, "rev16{cond} {reg1}, {reg2}", "rev16"); +} + +TEST_F(AssemblerArm32Test, revsh) { + T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh"); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 52023a67ee..15298b390b 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -2569,20 +2569,36 @@ void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x } +void Thumb2Assembler::Emit32Miscellaneous(uint8_t op1, + uint8_t op2, + uint32_t rest_encoding) { + int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B23 | + op1 << 20 | + 0xf << 12 | + B7 | + op2 << 4 | + rest_encoding; + Emit32(encoding); +} + + +void Thumb2Assembler::Emit16Miscellaneous(uint32_t rest_encoding) { + int16_t encoding = B15 | B13 | B12 | + rest_encoding; + Emit16(encoding); +} + void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) { CHECK_NE(rd, kNoRegister); CHECK_NE(rm, kNoRegister); CheckCondition(cond); CHECK_NE(rd, PC); CHECK_NE(rm, PC); - int32_t encoding = B31 | B30 | B29 | B28 | B27 | - B25 | B23 | B21 | B20 | + int32_t encoding = static_cast<uint32_t>(rm) << 16 | - 0xf << 12 | static_cast<uint32_t>(rd) << 8 | - B7 | static_cast<uint32_t>(rm); - Emit32(encoding); + Emit32Miscellaneous(0b11, 0b00, encoding); } @@ -2630,14 +2646,55 @@ void Thumb2Assembler::rbit(Register rd, Register rm, Condition cond) { CHECK_NE(rm, PC); CHECK_NE(rd, SP); CHECK_NE(rm, SP); - int32_t encoding = B31 | B30 | B29 | B28 | B27 | - B25 | B23 | B20 | + int32_t encoding = static_cast<uint32_t>(rm) << 16 | - 0xf << 12 | static_cast<uint32_t>(rd) << 8 | - B7 | B5 | static_cast<uint32_t>(rm); - Emit32(encoding); + + Emit32Miscellaneous(0b01, 0b10, encoding); +} + + +void Thumb2Assembler::EmitReverseBytes(Register rd, Register rm, + uint32_t op) { + CHECK_NE(rd, kNoRegister); + CHECK_NE(rm, kNoRegister); + CHECK_NE(rd, PC); + CHECK_NE(rm, PC); + CHECK_NE(rd, SP); + CHECK_NE(rm, SP); + + if (!IsHighRegister(rd) && !IsHighRegister(rm) && !force_32bit_) { + uint16_t t1_op = B11 | B9 | (op << 6); + int16_t encoding = t1_op | + static_cast<uint16_t>(rm) << 3 | + static_cast<uint16_t>(rd); + Emit16Miscellaneous(encoding); + } else { + int32_t encoding = + static_cast<uint32_t>(rm) << 16 | + static_cast<uint32_t>(rd) << 8 | + static_cast<uint32_t>(rm); + Emit32Miscellaneous(0b01, op, encoding); + } +} + + +void Thumb2Assembler::rev(Register rd, Register rm, Condition cond) { + CheckCondition(cond); + EmitReverseBytes(rd, rm, 0b00); +} + + +void Thumb2Assembler::rev16(Register rd, Register rm, Condition cond) { + CheckCondition(cond); + EmitReverseBytes(rd, rm, 0b01); +} + + +void Thumb2Assembler::revsh(Register rd, Register rm, Condition cond) { + CheckCondition(cond); + EmitReverseBytes(rd, rm, 0b11); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index bf07b2dbf8..6b61acafac 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -117,6 +117,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; + void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; // Multiply instructions. void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; @@ -644,6 +647,17 @@ class Thumb2Assembler FINAL : public ArmAssembler { Register rd, const ShifterOperand& so); + // Emit a single 32 bit miscellaneous instruction. + void Emit32Miscellaneous(uint8_t op1, + uint8_t op2, + uint32_t rest_encoding); + + // Emit reverse byte instructions: rev, rev16, revsh. + void EmitReverseBytes(Register rd, Register rm, uint32_t op); + + // Emit a single 16 bit miscellaneous instruction. + void Emit16Miscellaneous(uint32_t rest_encoding); + // Must the instruction be 32 bits or can it possibly be encoded // in 16 bits? bool Is32BitDataProcessing(Condition cond, diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 7b32b0fd26..650b08900b 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -1331,4 +1331,28 @@ TEST_F(AssemblerThumb2Test, rbit) { DriverStr(expected, "rbit"); } +TEST_F(AssemblerThumb2Test, rev) { + __ rev(arm::R1, arm::R0); + + const char* expected = "rev r1, r0\n"; + + DriverStr(expected, "rev"); +} + +TEST_F(AssemblerThumb2Test, rev16) { + __ rev16(arm::R1, arm::R0); + + const char* expected = "rev16 r1, r0\n"; + + DriverStr(expected, "rev16"); +} + +TEST_F(AssemblerThumb2Test, revsh) { + __ revsh(arm::R1, arm::R0); + + const char* expected = "revsh r1, r0\n"; + + DriverStr(expected, "revsh"); +} + } // namespace art diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 6fd65ee9a4..7c41813457 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -537,12 +537,20 @@ void MipsAssembler::Bgtz(Register rt, uint16_t imm16) { EmitI(0x7, rt, static_cast<Register>(0), imm16); } +void MipsAssembler::Bc1f(uint16_t imm16) { + Bc1f(0, imm16); +} + void MipsAssembler::Bc1f(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16); } +void MipsAssembler::Bc1t(uint16_t imm16) { + Bc1t(0, imm16); +} + void MipsAssembler::Bc1t(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; @@ -843,6 +851,22 @@ void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x3); } +void MipsAssembler::SqrtS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4); +} + +void MipsAssembler::SqrtD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4); +} + +void MipsAssembler::AbsS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5); +} + +void MipsAssembler::AbsD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5); +} + void MipsAssembler::MovS(FRegister fd, FRegister fs) { EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6); } @@ -859,84 +883,140 @@ void MipsAssembler::NegD(FRegister fd, FRegister fs) { EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7); } +void MipsAssembler::CunS(FRegister fs, FRegister ft) { + CunS(0, fs, ft); +} + void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31); } +void MipsAssembler::CeqS(FRegister fs, FRegister ft) { + CeqS(0, fs, ft); +} + void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32); } +void MipsAssembler::CueqS(FRegister fs, FRegister ft) { + CueqS(0, fs, ft); +} + void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33); } +void MipsAssembler::ColtS(FRegister fs, FRegister ft) { + ColtS(0, fs, ft); +} + void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34); } +void MipsAssembler::CultS(FRegister fs, FRegister ft) { + CultS(0, fs, ft); +} + void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35); } +void MipsAssembler::ColeS(FRegister fs, FRegister ft) { + ColeS(0, fs, ft); +} + void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36); } +void MipsAssembler::CuleS(FRegister fs, FRegister ft) { + CuleS(0, fs, ft); +} + void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37); } +void MipsAssembler::CunD(FRegister fs, FRegister ft) { + CunD(0, fs, ft); +} + void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31); } +void MipsAssembler::CeqD(FRegister fs, FRegister ft) { + CeqD(0, fs, ft); +} + void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32); } +void MipsAssembler::CueqD(FRegister fs, FRegister ft) { + CueqD(0, fs, ft); +} + void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33); } +void MipsAssembler::ColtD(FRegister fs, FRegister ft) { + ColtD(0, fs, ft); +} + void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34); } +void MipsAssembler::CultD(FRegister fs, FRegister ft) { + CultD(0, fs, ft); +} + void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35); } +void MipsAssembler::ColeD(FRegister fs, FRegister ft) { + ColeD(0, fs, ft); +} + void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36); } +void MipsAssembler::CuleD(FRegister fs, FRegister ft) { + CuleD(0, fs, ft); +} + void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; @@ -1055,6 +1135,70 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) { EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); } +void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11); +} + +void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11); +} + +void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); +} + +void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) { + CHECK(!IsR6()); + CHECK(IsUint<3>(cc)) << cc; + EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); +} + +void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x10, ft, fs, fd, 0x10); +} + +void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x11, ft, fs, fd, 0x10); +} + +void MipsAssembler::ClassS(FRegister fd, FRegister fs) { + CHECK(IsR6()); + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b); +} + +void MipsAssembler::ClassD(FRegister fd, FRegister fs) { + CHECK(IsR6()); + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b); +} + +void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x10, ft, fs, fd, 0x1c); +} + +void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x11, ft, fs, fd, 0x1c); +} + +void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x10, ft, fs, fd, 0x1e); +} + +void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); +} + void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); } @@ -1095,6 +1239,14 @@ void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); } +void MipsAssembler::FloorWS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf); +} + +void MipsAssembler::FloorWD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf); +} + void MipsAssembler::Mfc1(Register rt, FRegister fs) { EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } @@ -2062,11 +2214,19 @@ void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) { } } +void MipsAssembler::Bc1f(MipsLabel* label) { + Bc1f(0, label); +} + void MipsAssembler::Bc1f(int cc, MipsLabel* label) { CHECK(IsUint<3>(cc)) << cc; Bcond(label, kCondF, static_cast<Register>(cc), ZERO); } +void MipsAssembler::Bc1t(MipsLabel* label) { + Bc1t(0, label); +} + void MipsAssembler::Bc1t(int cc, MipsLabel* label) { CHECK(IsUint<3>(cc)) << cc; Bcond(label, kCondT, static_cast<Register>(cc), ZERO); diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 2262af49b3..a7179fd1dc 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -51,6 +51,20 @@ enum StoreOperandType { kStoreDoubleword }; +// Used to test the values returned by ClassS/ClassD. +enum FPClassMaskType { + kSignalingNaN = 0x001, + kQuietNaN = 0x002, + kNegativeInfinity = 0x004, + kNegativeNormal = 0x008, + kNegativeSubnormal = 0x010, + kNegativeZero = 0x020, + kPositiveInfinity = 0x040, + kPositiveNormal = 0x080, + kPositiveSubnormal = 0x100, + kPositiveZero = 0x200, +}; + class MipsLabel : public Label { public: MipsLabel() : prev_branch_id_plus_one_(0) {} @@ -191,7 +205,9 @@ class MipsAssembler FINAL : public Assembler { void Bgez(Register rt, uint16_t imm16); void Blez(Register rt, uint16_t imm16); void Bgtz(Register rt, uint16_t imm16); + void Bc1f(uint16_t imm16); // R2 void Bc1f(int cc, uint16_t imm16); // R2 + void Bc1t(uint16_t imm16); // R2 void Bc1t(int cc, uint16_t imm16); // R2 void J(uint32_t addr26); void Jal(uint32_t addr26); @@ -227,24 +243,42 @@ class MipsAssembler FINAL : public Assembler { void SubD(FRegister fd, FRegister fs, FRegister ft); void MulD(FRegister fd, FRegister fs, FRegister ft); void DivD(FRegister fd, FRegister fs, FRegister ft); + void SqrtS(FRegister fd, FRegister fs); + void SqrtD(FRegister fd, FRegister fs); + void AbsS(FRegister fd, FRegister fs); + void AbsD(FRegister fd, FRegister fs); void MovS(FRegister fd, FRegister fs); void MovD(FRegister fd, FRegister fs); void NegS(FRegister fd, FRegister fs); void NegD(FRegister fd, FRegister fs); + void CunS(FRegister fs, FRegister ft); // R2 void CunS(int cc, FRegister fs, FRegister ft); // R2 + void CeqS(FRegister fs, FRegister ft); // R2 void CeqS(int cc, FRegister fs, FRegister ft); // R2 + void CueqS(FRegister fs, FRegister ft); // R2 void CueqS(int cc, FRegister fs, FRegister ft); // R2 + void ColtS(FRegister fs, FRegister ft); // R2 void ColtS(int cc, FRegister fs, FRegister ft); // R2 + void CultS(FRegister fs, FRegister ft); // R2 void CultS(int cc, FRegister fs, FRegister ft); // R2 + void ColeS(FRegister fs, FRegister ft); // R2 void ColeS(int cc, FRegister fs, FRegister ft); // R2 + void CuleS(FRegister fs, FRegister ft); // R2 void CuleS(int cc, FRegister fs, FRegister ft); // R2 + void CunD(FRegister fs, FRegister ft); // R2 void CunD(int cc, FRegister fs, FRegister ft); // R2 + void CeqD(FRegister fs, FRegister ft); // R2 void CeqD(int cc, FRegister fs, FRegister ft); // R2 + void CueqD(FRegister fs, FRegister ft); // R2 void CueqD(int cc, FRegister fs, FRegister ft); // R2 + void ColtD(FRegister fs, FRegister ft); // R2 void ColtD(int cc, FRegister fs, FRegister ft); // R2 + void CultD(FRegister fs, FRegister ft); // R2 void CultD(int cc, FRegister fs, FRegister ft); // R2 + void ColeD(FRegister fs, FRegister ft); // R2 void ColeD(int cc, FRegister fs, FRegister ft); // R2 + void CuleD(FRegister fs, FRegister ft); // R2 void CuleD(int cc, FRegister fs, FRegister ft); // R2 void CmpUnS(FRegister fd, FRegister fs, FRegister ft); // R6 void CmpEqS(FRegister fd, FRegister fs, FRegister ft); // R6 @@ -266,8 +300,20 @@ class MipsAssembler FINAL : public Assembler { void CmpOrD(FRegister fd, FRegister fs, FRegister ft); // R6 void CmpUneD(FRegister fd, FRegister fs, FRegister ft); // R6 void CmpNeD(FRegister fd, FRegister fs, FRegister ft); // R6 - void Movf(Register rd, Register rs, int cc); // R2 - void Movt(Register rd, Register rs, int cc); // R2 + void Movf(Register rd, Register rs, int cc = 0); // R2 + void Movt(Register rd, Register rs, int cc = 0); // R2 + void MovfS(FRegister fd, FRegister fs, int cc = 0); // R2 + void MovfD(FRegister fd, FRegister fs, int cc = 0); // R2 + void MovtS(FRegister fd, FRegister fs, int cc = 0); // R2 + void MovtD(FRegister fd, FRegister fs, int cc = 0); // R2 + void SelS(FRegister fd, FRegister fs, FRegister ft); // R6 + void SelD(FRegister fd, FRegister fs, FRegister ft); // R6 + void ClassS(FRegister fd, FRegister fs); // R6 + void ClassD(FRegister fd, FRegister fs); // R6 + void MinS(FRegister fd, FRegister fs, FRegister ft); // R6 + void MinD(FRegister fd, FRegister fs, FRegister ft); // R6 + void MaxS(FRegister fd, FRegister fs, FRegister ft); // R6 + void MaxD(FRegister fd, FRegister fs, FRegister ft); // R6 void TruncLS(FRegister fd, FRegister fs); // R2+, FR=1 void TruncLD(FRegister fd, FRegister fs); // R2+, FR=1 @@ -279,6 +325,8 @@ class MipsAssembler FINAL : public Assembler { void Cvtds(FRegister fd, FRegister fs); void Cvtsl(FRegister fd, FRegister fs); // R2+, FR=1 void Cvtdl(FRegister fd, FRegister fs); // R2+, FR=1 + void FloorWS(FRegister fd, FRegister fs); + void FloorWD(FRegister fd, FRegister fs); void Mfc1(Register rt, FRegister fs); void Mtc1(Register rt, FRegister fs); @@ -322,7 +370,9 @@ class MipsAssembler FINAL : public Assembler { void Bge(Register rs, Register rt, MipsLabel* label); void Bltu(Register rs, Register rt, MipsLabel* label); void Bgeu(Register rs, Register rt, MipsLabel* label); + void Bc1f(MipsLabel* label); // R2 void Bc1f(int cc, MipsLabel* label); // R2 + void Bc1t(MipsLabel* label); // R2 void Bc1t(int cc, MipsLabel* label); // R2 void Bc1eqz(FRegister ft, MipsLabel* label); // R6 void Bc1nez(FRegister ft, MipsLabel* label); // R6 |