diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/debug/elf_debug_frame_writer.h | 7 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_info_writer.h | 4 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_line_writer.h | 4 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_loc_writer.h | 5 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_writer.cc | 40 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_writer.h | 7 | ||||
| -rw-r--r-- | compiler/debug/elf_gnu_debugdata_writer.h | 10 | ||||
| -rw-r--r-- | compiler/debug/elf_symtab_writer.h | 5 | ||||
| -rw-r--r-- | compiler/linker/elf_builder.h | 224 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 67 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 92 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/nodes_vector.h | 10 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 18 | ||||
| -rw-r--r-- | compiler/optimizing/scheduler.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/scheduler_arm64.h | 14 |
17 files changed, 299 insertions, 224 deletions
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h index d0c98a7b79..27b70c8caa 100644 --- a/compiler/debug/elf_debug_frame_writer.h +++ b/compiler/debug/elf_debug_frame_writer.h @@ -207,13 +207,12 @@ void WriteCFISection(linker::ElfBuilder<ElfTypes>* builder, } // Write .eh_frame/.debug_frame section. - auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT - ? builder->GetDebugFrame() - : builder->GetEhFrame()); + const bool is_debug_frame = format == dwarf::DW_DEBUG_FRAME_FORMAT; + auto* cfi_section = (is_debug_frame ? builder->GetDebugFrame() : builder->GetEhFrame()); { cfi_section->Start(); const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); - const Elf_Addr cfi_address = cfi_section->GetAddress(); + const Elf_Addr cfi_address = (is_debug_frame ? 0 : cfi_section->GetAddress()); const Elf_Addr cie_address = cfi_address; Elf_Addr buffer_address = cfi_address; std::vector<uint8_t> buffer; // Small temporary buffer. diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index 81a0a69bfa..107ed488cd 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -298,7 +298,7 @@ class ElfCompilationUnitWriter { CHECK_EQ(info_.Depth(), 0); std::vector<uint8_t> buffer; buffer.reserve(info_.data()->size() + KB); - const size_t offset = owner_->builder_->GetDebugInfo()->GetSize(); + const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition(); // All compilation units share single table which is at the start of .debug_abbrev. const size_t debug_abbrev_offset = 0; WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); @@ -463,7 +463,7 @@ class ElfCompilationUnitWriter { CHECK_EQ(info_.Depth(), 0); std::vector<uint8_t> buffer; buffer.reserve(info_.data()->size() + KB); - const size_t offset = owner_->builder_->GetDebugInfo()->GetSize(); + const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition(); // All compilation units share single table which is at the start of .debug_abbrev. const size_t debug_abbrev_offset = 0; WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index c7224fc94a..d7fd52448c 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -60,7 +60,7 @@ class ElfDebugLineWriter { ? builder_->GetText()->GetAddress() : 0; - compilation_unit.debug_line_offset = builder_->GetDebugLine()->GetSize(); + compilation_unit.debug_line_offset = builder_->GetDebugLine()->GetPosition(); std::vector<dwarf::FileEntry> files; std::unordered_map<std::string, size_t> files_map; @@ -268,7 +268,7 @@ class ElfDebugLineWriter { } std::vector<uint8_t> buffer; buffer.reserve(opcodes.data()->size() + KB); - size_t offset = builder_->GetDebugLine()->GetSize(); + size_t offset = builder_->GetDebugLine()->GetPosition(); WriteDebugLineTable(directories, files, opcodes, offset, &buffer, &debug_line_patches_); builder_->GetDebugLine()->WriteFully(buffer.data(), buffer.size()); return buffer.size(); diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index bb856b29f4..1d609af4e6 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -251,7 +251,10 @@ static void WriteDebugLocEntry(const MethodDebugInfo* method_info, // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind(). // kInRegisterHigh and kInFpuRegisterHigh should be handled by // the special cases above and they should not occur alone. - LOG(ERROR) << "Unexpected register location kind: " << kind; + LOG(WARNING) << "Unexpected register location: " << kind + << " (This can indicate either a bug in the dexer when generating" + << " local variable information, or a bug in ART compiler." + << " Please file a bug at go/art-bug)"; break; } if (is64bitValue) { diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index 33c46d7e1f..a6267292bf 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -108,29 +108,32 @@ void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, std::vector<uint8_t> MakeMiniDebugInfo( InstructionSet isa, const InstructionSetFeatures* features, - size_t rodata_size, + uint64_t text_address, size_t text_size, const ArrayRef<const MethodDebugInfo>& method_infos) { if (Is64BitInstructionSet(isa)) { return MakeMiniDebugInfoInternal<ElfTypes64>(isa, features, - rodata_size, + text_address, text_size, method_infos); } else { return MakeMiniDebugInfoInternal<ElfTypes32>(isa, features, - rodata_size, + text_address, text_size, method_infos); } } template <typename ElfTypes> -static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal( +static std::vector<uint8_t> MakeElfFileForJITInternal( InstructionSet isa, const InstructionSetFeatures* features, - const ArrayRef<const MethodDebugInfo>& method_infos) { + bool mini_debug_info, + const MethodDebugInfo& mi) { + CHECK_EQ(mi.is_code_address_text_relative, false); + ArrayRef<const MethodDebugInfo> method_infos(&mi, 1); std::vector<uint8_t> buffer; buffer.reserve(KB); linker::VectorOutputStream out("Debug ELF file", &buffer); @@ -138,23 +141,34 @@ static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal( new linker::ElfBuilder<ElfTypes>(isa, features, &out)); // No program headers since the ELF file is not linked and has no allocated sections. builder->Start(false /* write_program_headers */); - WriteDebugInfo(builder.get(), - method_infos, - dwarf::DW_DEBUG_FRAME_FORMAT, - false /* write_oat_patches */); + if (mini_debug_info) { + std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa, + features, + mi.code_address, + mi.code_size, + method_infos); + builder->WriteSection(".gnu_debugdata", &mdi); + } else { + builder->GetText()->AllocateVirtualMemory(mi.code_address, mi.code_size); + WriteDebugInfo(builder.get(), + method_infos, + dwarf::DW_DEBUG_FRAME_FORMAT, + false /* write_oat_patches */); + } builder->End(); CHECK(builder->Good()); return buffer; } -std::vector<uint8_t> WriteDebugElfFileForMethods( +std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, - const ArrayRef<const MethodDebugInfo>& method_infos) { + bool mini_debug_info, + const MethodDebugInfo& method_info) { if (Is64BitInstructionSet(isa)) { - return WriteDebugElfFileForMethodsInternal<ElfTypes64>(isa, features, method_infos); + return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_info); } else { - return WriteDebugElfFileForMethodsInternal<ElfTypes32>(isa, features, method_infos); + return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_info); } } diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index d24ca9b203..a47bf076b9 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -43,14 +43,15 @@ void WriteDebugInfo( std::vector<uint8_t> MakeMiniDebugInfo( InstructionSet isa, const InstructionSetFeatures* features, - size_t rodata_section_size, + uint64_t text_section_address, size_t text_section_size, const ArrayRef<const MethodDebugInfo>& method_infos); -std::vector<uint8_t> WriteDebugElfFileForMethods( +std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, - const ArrayRef<const MethodDebugInfo>& method_infos); + bool mini_debug_info, + const MethodDebugInfo& method_info); std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h index 1cdf6b0ad1..78b8e2780c 100644 --- a/compiler/debug/elf_gnu_debugdata_writer.h +++ b/compiler/debug/elf_gnu_debugdata_writer.h @@ -80,7 +80,7 @@ template <typename ElfTypes> static std::vector<uint8_t> MakeMiniDebugInfoInternal( InstructionSet isa, const InstructionSetFeatures* features, - size_t rodata_section_size, + typename ElfTypes::Addr text_section_address, size_t text_section_size, const ArrayRef<const MethodDebugInfo>& method_infos) { std::vector<uint8_t> buffer; @@ -88,11 +88,9 @@ static std::vector<uint8_t> MakeMiniDebugInfoInternal( linker::VectorOutputStream out("Mini-debug-info ELF file", &buffer); std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder( new linker::ElfBuilder<ElfTypes>(isa, features, &out)); - builder->Start(); - // Mirror .rodata and .text as NOBITS sections. - // It is needed to detected relocations after compression. - builder->GetRoData()->WriteNoBitsSection(rodata_section_size); - builder->GetText()->WriteNoBitsSection(text_section_size); + builder->Start(false /* write_program_headers */); + // Mirror .text as NOBITS section since the added symbols will reference it. + builder->GetText()->AllocateVirtualMemory(text_section_address, text_section_size); WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */); WriteCFISection(builder.get(), method_infos, diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index 0907e102a0..57e010f232 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -79,8 +79,9 @@ static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, last_name_offset = name_offset; } - const auto* text = info.is_code_address_text_relative ? builder->GetText() : nullptr; - uint64_t address = info.code_address + (text != nullptr ? text->GetAddress() : 0); + const auto* text = builder->GetText(); + uint64_t address = info.code_address; + address += info.is_code_address_text_relative ? text->GetAddress() : 0; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. address += CompiledMethod::CodeDelta(info.isa); symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC); diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h index b30b55e9b4..aa3cd98595 100644 --- a/compiler/linker/elf_builder.h +++ b/compiler/linker/elf_builder.h @@ -108,8 +108,6 @@ class ElfBuilder FINAL { section_index_(0), name_(name), link_(link), - started_(false), - finished_(false), phdr_flags_(PF_R), phdr_type_(0) { DCHECK_GE(align, 1u); @@ -120,90 +118,62 @@ class ElfBuilder FINAL { header_.sh_entsize = entsize; } - // Start writing of this section. - void Start() { - CHECK(!started_); - CHECK(!finished_); - started_ = true; - auto& sections = owner_->sections_; - // Check that the previous section is complete. - CHECK(sections.empty() || sections.back()->finished_); - // The first ELF section index is 1. Index 0 is reserved for NULL. - section_index_ = sections.size() + 1; - // Page-align if we switch between allocated and non-allocated sections, - // or if we change the type of allocation (e.g. executable vs non-executable). - if (!sections.empty()) { - if (header_.sh_flags != sections.back()->header_.sh_flags) { - header_.sh_addralign = kPageSize; - } - } - // Align file position. - if (header_.sh_type != SHT_NOBITS) { - header_.sh_offset = owner_->AlignFileOffset(header_.sh_addralign); - } else { - header_.sh_offset = 0; - } - // Align virtual memory address. - if ((header_.sh_flags & SHF_ALLOC) != 0) { - header_.sh_addr = owner_->AlignVirtualAddress(header_.sh_addralign); - } else { - header_.sh_addr = 0; - } - // Push this section on the list of written sections. - sections.push_back(this); + // Allocate chunk of virtual memory for this section from the owning ElfBuilder. + // This must be done at the start for all SHF_ALLOC sections (i.e. mmaped by linker). + // It is fine to allocate section but never call Start/End() (e.g. the .bss section). + void AllocateVirtualMemory(Elf_Word size) { + AllocateVirtualMemory(owner_->virtual_address_, size); } - // Finish writing of this section. - void End() { - CHECK(started_); - CHECK(!finished_); - finished_ = true; - if (header_.sh_type == SHT_NOBITS) { - CHECK_GT(header_.sh_size, 0u); - } else { - // Use the current file position to determine section size. - off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent); - CHECK_GE(file_offset, (off_t)header_.sh_offset); - header_.sh_size = file_offset - header_.sh_offset; - } - if ((header_.sh_flags & SHF_ALLOC) != 0) { - owner_->virtual_address_ += header_.sh_size; - } + void AllocateVirtualMemory(Elf_Addr addr, Elf_Word size) { + CHECK_NE(header_.sh_flags & SHF_ALLOC, 0u); + Elf_Word align = AddSection(); + CHECK_EQ(header_.sh_addr, 0u); + header_.sh_addr = RoundUp(addr, align); + CHECK(header_.sh_size == 0u || header_.sh_size == size); + header_.sh_size = size; + CHECK_LE(owner_->virtual_address_, header_.sh_addr); + owner_->virtual_address_ = header_.sh_addr + header_.sh_size; } - // Get the location of this section in virtual memory. - Elf_Addr GetAddress() const { - CHECK(started_); - return header_.sh_addr; + // Start writing file data of this section. + void Start() { + CHECK(owner_->current_section_ == nullptr); + Elf_Word align = AddSection(); + CHECK_EQ(header_.sh_offset, 0u); + header_.sh_offset = owner_->AlignFileOffset(align); + owner_->current_section_ = this; } - // Returns the size of the content of this section. - Elf_Word GetSize() const { - if (finished_) { - return header_.sh_size; - } else { - CHECK(started_); - CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS); - return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset; - } + // Finish writing file data of this section. + void End() { + CHECK(owner_->current_section_ == this); + Elf_Word position = GetPosition(); + CHECK(header_.sh_size == 0u || header_.sh_size == position); + header_.sh_size = position; + owner_->current_section_ = nullptr; + } + + // Get the number of bytes written so far. + // Only valid while writing the section. + Elf_Word GetPosition() const { + CHECK(owner_->current_section_ == this); + off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent); + DCHECK_GE(file_offset, (off_t)header_.sh_offset); + return file_offset - header_.sh_offset; } - // Write this section as "NOBITS" section. (used for the .bss section) - // This means that the ELF file does not contain the initial data for this section - // and it will be zero-initialized when the ELF file is loaded in the running program. - void WriteNoBitsSection(Elf_Word size) { + // Get the location of this section in virtual memory. + Elf_Addr GetAddress() const { DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u); - header_.sh_type = SHT_NOBITS; - Start(); - header_.sh_size = size; - End(); + DCHECK_NE(header_.sh_addr, 0u); + return header_.sh_addr; } // This function always succeeds to simplify code. // Use builder's Good() to check the actual status. bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE { - CHECK(started_); - CHECK(!finished_); + CHECK(owner_->current_section_ == this); return owner_->stream_.WriteFully(buffer, byte_count); } @@ -221,19 +191,32 @@ class ElfBuilder FINAL { } Elf_Word GetSectionIndex() const { - DCHECK(started_); DCHECK_NE(section_index_, 0u); return section_index_; } private: + // Add this section to the list of generated ELF sections (if not there already). + // It also ensures the alignment is sufficient to generate valid program headers, + // since that depends on the previous section. It returns the required alignment. + Elf_Word AddSection() { + if (section_index_ == 0) { + std::vector<Section*>& sections = owner_->sections_; + Elf_Word last = sections.empty() ? PF_R : sections.back()->phdr_flags_; + if (phdr_flags_ != last) { + header_.sh_addralign = kPageSize; // Page-align if R/W/X flags changed. + } + sections.push_back(this); + section_index_ = sections.size(); // First ELF section has index 1. + } + return owner_->write_program_headers_ ? header_.sh_addralign : 1; + } + ElfBuilder<ElfTypes>* owner_; Elf_Shdr header_; Elf_Word section_index_; const std::string name_; const Section* const link_; - bool started_; - bool finished_; Elf_Word phdr_flags_; Elf_Word phdr_type_; @@ -370,7 +353,7 @@ class ElfBuilder FINAL { Elf_Word section_index; if (section != nullptr) { DCHECK_LE(section->GetAddress(), addr); - DCHECK_LE(addr, section->GetAddress() + section->GetSize()); + DCHECK_LE(addr, section->GetAddress() + section->header_.sh_size); section_index = section->GetSectionIndex(); } else { section_index = static_cast<Elf_Word>(SHN_ABS); @@ -479,6 +462,10 @@ class ElfBuilder FINAL { digest_start_(-1) { } + Elf_Word GetSize() { + return 16 + kBuildIdLen; + } + void Write() { // The size fields are 32-bit on both 32-bit and 64-bit systems, confirmed // with the 64-bit linker and libbfd code. The size of name and desc must @@ -490,6 +477,7 @@ class ElfBuilder FINAL { digest_start_ = this->Seek(0, kSeekCurrent); static_assert(kBuildIdLen % 4 == 0, "expecting a mutliple of 4 for build ID length"); this->WriteFully(std::string(kBuildIdLen, '\0').c_str(), kBuildIdLen); // desc. + DCHECK_EQ(this->GetPosition(), GetSize()); } off_t GetDigestStart() { @@ -530,6 +518,7 @@ class ElfBuilder FINAL { abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0, isa, features), build_id_(this, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, nullptr, 0, 4, 0), + current_section_(nullptr), started_(false), write_program_headers_(false), loaded_size_(0u), @@ -545,6 +534,7 @@ class ElfBuilder FINAL { ~ElfBuilder() {} InstructionSet GetIsa() { return isa_; } + BuildIdSection* GetBuildId() { return &build_id_; } Section* GetRoData() { return &rodata_; } Section* GetText() { return &text_; } Section* GetBss() { return &bss_; } @@ -622,6 +612,9 @@ class ElfBuilder FINAL { if (section->link_ != nullptr) { section->header_.sh_link = section->link_->GetSectionIndex(); } + if (section->header_.sh_offset == 0) { + section->header_.sh_type = SHT_NOBITS; + } } shstrtab_.End(); @@ -680,65 +673,57 @@ class ElfBuilder FINAL { soname = soname.substr(directory_separator_pos + 1); } - // Calculate addresses of .text, .bss and .dynstr. - DCHECK_EQ(rodata_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); - DCHECK_EQ(text_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); - DCHECK_EQ(bss_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); - DCHECK_EQ(dynstr_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); - Elf_Word rodata_address = rodata_.GetAddress(); - Elf_Word text_address = RoundUp(rodata_address + rodata_size, kPageSize); - Elf_Word bss_address = RoundUp(text_address + text_size, kPageSize); - Elf_Word abiflags_address = RoundUp(bss_address + bss_size, kPageSize); - Elf_Word abiflags_size = 0; + // Allocate all pre-dynamic sections. + rodata_.AllocateVirtualMemory(rodata_size); + text_.AllocateVirtualMemory(text_size); + if (bss_size != 0) { + bss_.AllocateVirtualMemory(bss_size); + } if (isa_ == InstructionSet::kMips || isa_ == InstructionSet::kMips64) { - abiflags_size = abiflags_.GetSize(); + abiflags_.AllocateVirtualMemory(abiflags_.GetSize()); } - Elf_Word dynstr_address = RoundUp(abiflags_address + abiflags_size, kPageSize); // Cache .dynstr, .dynsym and .hash data. dynstr_.Add(""); // dynstr should start with empty string. - Elf_Word rodata_index = rodata_.GetSectionIndex(); Elf_Word oatdata = dynstr_.Add("oatdata"); - dynsym_.Add(oatdata, rodata_index, rodata_address, rodata_size, STB_GLOBAL, STT_OBJECT); + dynsym_.Add(oatdata, &rodata_, rodata_.GetAddress(), rodata_size, STB_GLOBAL, STT_OBJECT); if (text_size != 0u) { - Elf_Word text_index = rodata_index + 1u; Elf_Word oatexec = dynstr_.Add("oatexec"); - dynsym_.Add(oatexec, text_index, text_address, text_size, STB_GLOBAL, STT_OBJECT); + dynsym_.Add(oatexec, &text_, text_.GetAddress(), text_size, STB_GLOBAL, STT_OBJECT); Elf_Word oatlastword = dynstr_.Add("oatlastword"); - Elf_Word oatlastword_address = text_address + text_size - 4; - dynsym_.Add(oatlastword, text_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); + Elf_Word oatlastword_address = text_.GetAddress() + text_size - 4; + dynsym_.Add(oatlastword, &text_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); } else if (rodata_size != 0) { // rodata_ can be size 0 for dwarf_test. Elf_Word oatlastword = dynstr_.Add("oatlastword"); - Elf_Word oatlastword_address = rodata_address + rodata_size - 4; - dynsym_.Add(oatlastword, rodata_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); + Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4; + dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); } DCHECK_LE(bss_roots_offset, bss_size); if (bss_size != 0u) { - Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u); Elf_Word oatbss = dynstr_.Add("oatbss"); - dynsym_.Add(oatbss, bss_index, bss_address, bss_roots_offset, STB_GLOBAL, STT_OBJECT); + dynsym_.Add(oatbss, &bss_, bss_.GetAddress(), bss_roots_offset, STB_GLOBAL, STT_OBJECT); DCHECK_LE(bss_methods_offset, bss_roots_offset); DCHECK_LE(bss_roots_offset, bss_size); // Add a symbol marking the start of the methods part of the .bss, if not empty. if (bss_methods_offset != bss_roots_offset) { - Elf_Word bss_methods_address = bss_address + bss_methods_offset; + Elf_Word bss_methods_address = bss_.GetAddress() + bss_methods_offset; Elf_Word bss_methods_size = bss_roots_offset - bss_methods_offset; Elf_Word oatbssroots = dynstr_.Add("oatbssmethods"); dynsym_.Add( - oatbssroots, bss_index, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT); + oatbssroots, &bss_, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT); } // Add a symbol marking the start of the GC roots part of the .bss, if not empty. if (bss_roots_offset != bss_size) { - Elf_Word bss_roots_address = bss_address + bss_roots_offset; + Elf_Word bss_roots_address = bss_.GetAddress() + bss_roots_offset; Elf_Word bss_roots_size = bss_size - bss_roots_offset; Elf_Word oatbssroots = dynstr_.Add("oatbssroots"); dynsym_.Add( - oatbssroots, bss_index, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT); + oatbssroots, &bss_, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT); } Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword"); - Elf_Word bsslastword_address = bss_address + bss_size - 4; - dynsym_.Add(oatbsslastword, bss_index, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); + Elf_Word bsslastword_address = bss_.GetAddress() + bss_size - 4; + dynsym_.Add(oatbsslastword, &bss_, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); } Elf_Word soname_offset = dynstr_.Add(soname); @@ -759,28 +744,24 @@ class ElfBuilder FINAL { hash.push_back(0); // Last symbol terminates the chain. hash_.Add(hash.data(), hash.size() * sizeof(hash[0])); - // Calculate addresses of .dynsym, .hash and .dynamic. - DCHECK_EQ(dynstr_.header_.sh_flags, dynsym_.header_.sh_flags); - DCHECK_EQ(dynsym_.header_.sh_flags, hash_.header_.sh_flags); - Elf_Word dynsym_address = - RoundUp(dynstr_address + dynstr_.GetCacheSize(), dynsym_.header_.sh_addralign); - Elf_Word hash_address = - RoundUp(dynsym_address + dynsym_.GetCacheSize(), hash_.header_.sh_addralign); - DCHECK_EQ(dynamic_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize)); - Elf_Word dynamic_address = RoundUp(hash_address + dynsym_.GetCacheSize(), kPageSize); + // Allocate all remaining sections. + dynstr_.AllocateVirtualMemory(dynstr_.GetCacheSize()); + dynsym_.AllocateVirtualMemory(dynsym_.GetCacheSize()); + hash_.AllocateVirtualMemory(hash_.GetCacheSize()); Elf_Dyn dyns[] = { - { DT_HASH, { hash_address } }, - { DT_STRTAB, { dynstr_address } }, - { DT_SYMTAB, { dynsym_address } }, + { DT_HASH, { hash_.GetAddress() } }, + { DT_STRTAB, { dynstr_.GetAddress() } }, + { DT_SYMTAB, { dynsym_.GetAddress() } }, { DT_SYMENT, { sizeof(Elf_Sym) } }, { DT_STRSZ, { dynstr_.GetCacheSize() } }, { DT_SONAME, { soname_offset } }, { DT_NULL, { 0 } }, }; dynamic_.Add(&dyns, sizeof(dyns)); + dynamic_.AllocateVirtualMemory(dynamic_.GetCacheSize()); - loaded_size_ = RoundUp(dynamic_address + dynamic_.GetCacheSize(), kPageSize); + loaded_size_ = RoundUp(virtual_address_, kPageSize); } void WriteDynamicSection() { @@ -788,8 +769,6 @@ class ElfBuilder FINAL { dynsym_.WriteCachedSection(); hash_.WriteCachedSection(); dynamic_.WriteCachedSection(); - - CHECK_EQ(loaded_size_, RoundUp(dynamic_.GetAddress() + dynamic_.GetSize(), kPageSize)); } Elf_Word GetLoadedSize() { @@ -828,10 +807,6 @@ class ElfBuilder FINAL { return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet); } - Elf_Addr AlignVirtualAddress(size_t alignment) { - return virtual_address_ = RoundUp(virtual_address_, alignment); - } - private: static Elf_Ehdr MakeElfHeader(InstructionSet isa, const InstructionSetFeatures* features) { Elf_Ehdr elf_header = Elf_Ehdr(); @@ -902,7 +877,6 @@ class ElfBuilder FINAL { elf_header.e_ehsize = sizeof(Elf_Ehdr); elf_header.e_phentsize = sizeof(Elf_Phdr); elf_header.e_shentsize = sizeof(Elf_Shdr); - elf_header.e_phoff = sizeof(Elf_Ehdr); return elf_header; } @@ -933,6 +907,7 @@ class ElfBuilder FINAL { for (auto* section : sections_) { const Elf_Shdr& shdr = section->header_; if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) { + DCHECK(shdr.sh_addr != 0u) << "Allocate virtual memory for the section"; // PT_LOAD tells the linker to mmap part of the file. // The linker can only mmap page-aligned sections. // Single PT_LOAD may contain several ELF sections. @@ -1010,6 +985,7 @@ class ElfBuilder FINAL { // List of used section in the order in which they were written. std::vector<Section*> sections_; + Section* current_section_; // The section which is currently being written. bool started_; bool write_program_headers_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 2e8170ecc4..42ee9db167 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -5732,24 +5732,18 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { return codegen_->GetAssembler(); } -void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { +void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) { ScratchRegisterScope ensure_scratch( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); -} -void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + // Now that temp register is available (possibly spilled), move blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(temp_reg, Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), temp_reg); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -5800,7 +5794,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); - MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } } else if (source.IsDoubleStackSlot()) { if (destination.IsRegisterPair()) { @@ -5811,11 +5805,15 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5915,7 +5913,16 @@ void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { __ movd(reg, temp_reg); } -void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { +void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 4 * kX86WordSize; + __ subl(ESP, Immediate(extra_slot)); + __ movups(Address(ESP, 0), XmmRegister(reg)); + ExchangeMemory(0, mem + extra_slot, 4); + __ movups(XmmRegister(reg), Address(ESP, 0)); + __ addl(ESP, Immediate(extra_slot)); +} + +void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) { ScratchRegisterScope ensure_scratch1( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); @@ -5925,10 +5932,15 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -5947,7 +5959,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { // Use XOR Swap algorithm to avoid a temporary. DCHECK_NE(source.reg(), destination.reg()); @@ -5983,8 +5995,13 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { // Move the high double to the low double. __ psrldq(reg, Immediate(8)); } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); - Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 176e4dfda0..40b7e3c54f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -139,10 +139,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { private: void Exchange(Register reg, int mem); - void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); - void MoveMemoryToMemory32(int dst, int src); - void MoveMemoryToMemory64(int dst, int src); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory(int mem1, int mem2, int number_of_words); + void MoveMemoryToMemory(int dst, int src, int number_of_words); CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e25688c9a3..02fbf234c1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5220,9 +5220,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + size_t high = kX86_64WordSize; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5290,19 +5298,6 @@ void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { __ movl(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movl(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { __ movq(CpuRegister(TMP), reg1); __ movq(reg1, reg2); @@ -5315,19 +5310,6 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { __ movq(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movss(Address(CpuRegister(RSP), mem), reg); @@ -5340,6 +5322,48 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 2 * kX86_64WordSize; + __ subq(CpuRegister(RSP), Immediate(extra_slot)); + __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); + ExchangeMemory64(0, mem + extra_slot, 2); + __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); + __ addq(CpuRegister(RSP), Immediate(extra_slot)); +} + +void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movl(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < num_of_qwords; i++) { + __ movq(CpuRegister(TMP), + Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), + CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); + stack_offset += kX86_64WordSize; + } +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_[index]; Location source = move->GetSource(); @@ -5352,13 +5376,13 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange32(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { - Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); @@ -5371,6 +5395,12 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 00c5c27470..e86123ef01 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -139,11 +139,12 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { private: void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); - void Exchange32(int mem1, int mem2); void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); - void Exchange64(int mem1, int mem2); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory32(int mem1, int mem2); + void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 096349fd73..87dff8403b 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -109,6 +109,16 @@ class HVecOperation : public HVariableInputSizeInstruction { // Assumes vector nodes cannot be moved by default. Each concrete implementation // that can be moved should override this method and return true. + // + // Note: similar approach is used for instruction scheduling (if it is turned on for the target): + // by default HScheduler::IsSchedulable returns false for a particular HVecOperation. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. + // + // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be + // altered to return true if the instruction might reside outside the SIMD loop body since SIMD + // registers are not kept alive across vector loop boundaries (yet). bool CanBeMoved() const OVERRIDE { return false; } // Tests if all data of a vector node (vector length and packed type) is equal. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 73c72fc57a..24b1a123ee 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1224,7 +1224,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, } const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - if (compiler_options.GetGenerateDebugInfo()) { + if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; @@ -1244,10 +1244,13 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods( + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( GetCompilerDriver()->GetInstructionSet(), GetCompilerDriver()->GetInstructionSetFeatures(), - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + mini_debug_info, + info); CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); } @@ -1352,7 +1355,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, } const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - if (compiler_options.GetGenerateDebugInfo()) { + if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; @@ -1372,10 +1375,13 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods( + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( GetCompilerDriver()->GetInstructionSet(), GetCompilerDriver()->GetInstructionSetFeatures(), - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + mini_debug_info, + info); CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index bb7c353bc2..dfa077f7de 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -462,6 +462,11 @@ class HScheduler { // containing basic block from being scheduled. // This method is used to restrict scheduling to instructions that we know are // safe to handle. + // + // For newly introduced instructions by default HScheduler::IsSchedulable returns false. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. virtual bool IsSchedulable(const HInstruction* instruction) const; bool IsSchedulable(const HBasicBlock* block) const; diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 32f161f26a..f71cb5b784 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -151,6 +151,20 @@ class HSchedulerARM64 : public HScheduler { #undef CASE_INSTRUCTION_KIND } + // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized + // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; + // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of + // SIMD&FP registers are callee saved) so don't reorder such vector instructions. + // + // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. + bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { + return HScheduler::IsSchedulingBarrier(instr) || + instr->IsVecReduce() || + instr->IsVecExtractScalar() || + instr->IsVecSetScalars() || + instr->IsVecReplicateScalar(); + } + private: SchedulingLatencyVisitorARM64 arm64_latency_visitor_; DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); |