diff options
Diffstat (limited to 'compiler')
49 files changed, 1241 insertions, 199 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index d0b519240e..c798d9782a 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -54,6 +54,7 @@ art_cc_defaults { "optimizing/code_generator_utils.cc", "optimizing/code_sinking.cc", "optimizing/constant_folding.cc", + "optimizing/constructor_fence_redundancy_elimination.cc", "optimizing/dead_code_elimination.cc", "optimizing/escape.cc", "optimizing/graph_checker.cc", diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 97127f58ed..5ef6cbff78 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -124,8 +124,10 @@ class LinkerPatch { kCall, kCallRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. + kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent. kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. kStringRelative, // NOTE: Actual patching is instruction_set-dependent. + kStringInternTable, // NOTE: Actual patching is instruction_set-dependent. kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; @@ -176,6 +178,16 @@ class LinkerPatch { return patch; } + static LinkerPatch TypeClassTablePatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_type_idx) { + LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file); + patch.type_idx_ = target_type_idx; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch TypeBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -196,6 +208,16 @@ class LinkerPatch { return patch; } + static LinkerPatch StringInternTablePatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_string_idx) { + LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file); + patch.string_idx_ = target_string_idx; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch StringBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -232,8 +254,10 @@ class LinkerPatch { case Type::kMethodBssEntry: case Type::kCallRelative: case Type::kTypeRelative: + case Type::kTypeClassTable: case Type::kTypeBssEntry: case Type::kStringRelative: + case Type::kStringInternTable: case Type::kStringBssEntry: case Type::kBakerReadBarrierBranch: return true; @@ -252,24 +276,28 @@ class LinkerPatch { const DexFile* TargetTypeDexFile() const { DCHECK(patch_type_ == Type::kTypeRelative || + patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return target_dex_file_; } dex::TypeIndex TargetTypeIndex() const { DCHECK(patch_type_ == Type::kTypeRelative || + patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return dex::TypeIndex(type_idx_); } const DexFile* TargetStringDexFile() const { DCHECK(patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return target_dex_file_; } dex::StringIndex TargetStringIndex() const { DCHECK(patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return dex::StringIndex(string_idx_); } @@ -278,8 +306,10 @@ class LinkerPatch { DCHECK(patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kTypeRelative || + patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return pc_insn_offset_; } diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc index c7e9f4fc07..23511e55fc 100644 --- a/compiler/dex/quick_compiler_callbacks.cc +++ b/compiler/dex/quick_compiler_callbacks.cc @@ -34,17 +34,21 @@ void QuickCompilerCallbacks::ClassRejected(ClassReference ref) { } } -bool QuickCompilerCallbacks::CanAssumeVerified(ClassReference ref) { +ClassStatus QuickCompilerCallbacks::GetPreviousClassState(ClassReference ref) { // If we don't have class unloading enabled in the compiler, we will never see class that were // previously verified. Return false to avoid overhead from the lookup in the compiler driver. if (!does_class_unloading_) { - return false; + return ClassStatus::kStatusNotReady; } DCHECK(compiler_driver_ != nullptr); // In the case of the quicken filter: avoiding verification of quickened instructions, which the // verifier doesn't currently support. // In the case of the verify filter, avoiding verifiying twice. - return compiler_driver_->CanAssumeVerified(ref); + ClassStatus status; + if (!compiler_driver_->GetCompiledClass(ref, &status)) { + return ClassStatus::kStatusNotReady; + } + return status; } } // namespace art diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h index 578aff45e5..45456f2a1c 100644 --- a/compiler/dex/quick_compiler_callbacks.h +++ b/compiler/dex/quick_compiler_callbacks.h @@ -54,7 +54,7 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks { verification_results_ = verification_results; } - bool CanAssumeVerified(ClassReference ref) OVERRIDE; + ClassStatus GetPreviousClassState(ClassReference ref) OVERRIDE; void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver) OVERRIDE { diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index ee36a92c17..18b54eefba 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -3053,10 +3053,4 @@ void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& de } } -bool CompilerDriver::CanAssumeVerified(ClassReference ref) const { - mirror::Class::Status existing = mirror::Class::kStatusNotReady; - compiled_classes_.Get(DexFileReference(ref.first, ref.second), &existing); - return existing >= mirror::Class::kStatusVerified; -} - } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 11808c1be4..d08d9d7940 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -379,8 +379,6 @@ class CompilerDriver { return profile_compilation_info_; } - bool CanAssumeVerified(ClassReference ref) const; - // Is `boot_image_filename` the name of a core image (small boot // image used for ART testing only)? static bool IsCoreImageFilename(const std::string& boot_image_filename) { diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 392d57c0f2..278358b250 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -369,8 +369,6 @@ TEST_F(CompilerDriverVerifyTest, VerifyCompilation) { // Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the // driver. -// Test that checks that classes can be assumed as verified if unloading mode is enabled and -// the class status is at least verified. TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { Thread* const self = Thread::Current(); jobject class_loader; @@ -401,12 +399,6 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { mirror::Class::Status status = {}; ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status)); EXPECT_EQ(status, expected_status); - - // Check that we can assume verified if we are a status that is at least verified. - if (status >= mirror::Class::kStatusVerified) { - // Check that the class can be assumed as verified in the compiler driver. - EXPECT_TRUE(callbacks_->CanAssumeVerified(ref)) << status; - } } } diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 252fdd67e1..7b623dd979 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -46,7 +46,7 @@ TEST_F(ImageTest, TestImageLayout) { // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the // first two images. ASSERT_EQ(image_sizes.size(), image_sizes.size()); - // Sizes of the images should be the same. These sizes are for the whole image unrounded. + // Sizes of the object sections should be the same for all but the last image. for (size_t i = 0; i < image_sizes.size() - 1; ++i) { EXPECT_EQ(image_sizes[i], image_sizes_extra[i]); } diff --git a/compiler/image_test.h b/compiler/image_test.h index daa4b11967..f1adeddb69 100644 --- a/compiler/image_test.h +++ b/compiler/image_test.h @@ -133,7 +133,7 @@ inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { ImageHeader image_header; CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); CHECK(image_header.IsValid()); - ret.push_back(image_header.GetImageSize()); + ret.push_back(image_header.GetObjectsSection().Size()); } return ret; } @@ -398,7 +398,7 @@ inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ImageHeader image_header; ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); ASSERT_TRUE(image_header.IsValid()); - const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); + const auto& bitmap_section = image_header.GetImageBitmapSection(); ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); ASSERT_NE(0U, bitmap_section.Size()); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 6ee9cc6056..4ffe238cc7 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -73,6 +73,7 @@ #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "utils/dex_cache_arrays_layout-inl.h" +#include "well_known_classes.h" using ::art::mirror::Class; using ::art::mirror::DexCache; @@ -298,8 +299,7 @@ bool ImageWriter::Write(int image_fd, // Write out the image bitmap at the page aligned start of the image end, also uncompressed for // convenience. - const ImageSection& bitmap_section = image_header->GetImageSection( - ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageBitmapSection(); // Align up since data size may be unaligned if the image is compressed. size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize); if (!is_compressed) { @@ -690,7 +690,7 @@ bool ImageWriter::AllocMemory() { for (ImageInfo& image_info : image_infos_) { ImageSection unused_sections[ImageHeader::kSectionCount]; const size_t length = RoundUp( - image_info.CreateImageSections(unused_sections), kPageSize); + image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize); std::string error_msg; image_info.image_.reset(MemMap::MapAnonymous("image writer image", @@ -1835,7 +1835,8 @@ void ImageWriter::CalculateNewObjectOffsets() { image_info.image_begin_ = global_image_begin_ + image_offset; image_info.image_offset_ = image_offset; ImageSection unused_sections[ImageHeader::kSectionCount]; - image_info.image_size_ = RoundUp(image_info.CreateImageSections(unused_sections), kPageSize); + image_info.image_size_ = + RoundUp(image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize); // There should be no gaps until the next image. image_offset += image_info.image_size_; } @@ -1866,7 +1867,8 @@ void ImageWriter::CalculateNewObjectOffsets() { } } -size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const { +size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections, + bool app_image) const { DCHECK(out_sections != nullptr); // Do not round up any sections here that are represented by the bins since it will break @@ -1905,8 +1907,13 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays]; *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray], bin_slot_sizes_[kBinDexCacheArray]); - // Round up to the alignment the string table expects. See HashSet::WriteToMemory. - size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t)); + // For boot image, round up to the page boundary to separate the interned strings and + // class table from the modifiable data. We shall mprotect() these pages read-only when + // we load the boot image. This is more than sufficient for the string table alignment, + // namely sizeof(uint64_t). See HashSet::WriteToMemory. + static_assert(IsAligned<sizeof(uint64_t)>(kPageSize), "String table alignment check."); + size_t cur_pos = + RoundUp(dex_cache_arrays_section->End(), app_image ? sizeof(uint64_t) : kPageSize); // Calculate the size of the interned strings. ImageSection* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings]; *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); @@ -1929,7 +1936,7 @@ void ImageWriter::CreateHeader(size_t oat_index) { // Create the image sections. ImageSection sections[ImageHeader::kSectionCount]; - const size_t image_end = image_info.CreateImageSections(sections); + const size_t image_end = image_info.CreateImageSections(sections, compile_app_image_); // Finally bitmap section. const size_t bitmap_bytes = image_info.image_bitmap_->Size(); @@ -2114,8 +2121,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // Write the intern table into the image. if (image_info.intern_table_bytes_ > 0) { - const ImageSection& intern_table_section = image_header->GetImageSection( - ImageHeader::kSectionInternedStrings); + const ImageSection& intern_table_section = image_header->GetInternedStringsSection(); InternTable* const intern_table = image_info.intern_table_.get(); uint8_t* const intern_table_memory_ptr = image_info.image_->Begin() + intern_table_section.Offset(); @@ -2134,8 +2140,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple // class loaders. Writing multiple class tables into the image is currently unsupported. if (image_info.class_table_bytes_ > 0u) { - const ImageSection& class_table_section = image_header->GetImageSection( - ImageHeader::kSectionClassTable); + const ImageSection& class_table_section = image_header->GetClassTableSection(); uint8_t* const class_table_memory_ptr = image_info.image_->Begin() + class_table_section.Offset(); ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 866e2042f7..2fc394e862 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -258,7 +258,7 @@ class ImageWriter FINAL { // Create the image sections into the out sections variable, returns the size of the image // excluding the bitmap. - size_t CreateImageSections(ImageSection* out_sections) const; + size_t CreateImageSections(ImageSection* out_sections, bool app_image) const; std::unique_ptr<MemMap> image_; // Memory mapped for generating the image. diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index db829f3233..4960f4d856 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -61,8 +61,10 @@ inline bool IsAdrpPatch(const LinkerPatch& patch) { case LinkerPatch::Type::kMethodRelative: case LinkerPatch::Type::kMethodBssEntry: case LinkerPatch::Type::kTypeRelative: + case LinkerPatch::Type::kTypeClassTable: case LinkerPatch::Type::kTypeBssEntry: case LinkerPatch::Type::kStringRelative: + case LinkerPatch::Type::kStringInternTable: case LinkerPatch::Type::kStringBssEntry: return patch.LiteralOffset() == patch.PcInsnOffset(); } @@ -265,7 +267,9 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, } else { // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry || + patch.GetType() == LinkerPatch::Type::kTypeClassTable || patch.GetType() == LinkerPatch::Type::kTypeBssEntry || + patch.GetType() == LinkerPatch::Type::kStringInternTable || patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; } diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 6fbb2bd441..cc8c6dfac0 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -28,6 +28,7 @@ #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" +#include "class_table-inl.h" #include "compiled_method.h" #include "debug/method_debug_info.h" #include "dex/verification_results.h" @@ -335,6 +336,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo bss_method_entries_(), bss_type_entries_(), bss_string_entries_(), + map_boot_image_tables_to_bss_(false), oat_data_offset_(0u), oat_header_(nullptr), size_vdex_header_(0), @@ -771,6 +773,9 @@ class OatWriter::InitBssLayoutMethodVisitor : public DexMethodVisitor { } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u); + } else if (patch.GetType() == LinkerPatch::Type::kStringInternTable || + patch.GetType() == LinkerPatch::Type::kTypeClassTable) { + writer_->map_boot_image_tables_to_bss_ = true; } } } else { @@ -1398,6 +1403,14 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { target_offset); break; } + case LinkerPatch::Type::kStringInternTable: { + uint32_t target_offset = GetInternTableEntryOffset(patch); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); + break; + } case LinkerPatch::Type::kStringBssEntry: { StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); uint32_t target_offset = @@ -1416,6 +1429,14 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { target_offset); break; } + case LinkerPatch::Type::kTypeClassTable: { + uint32_t target_offset = GetClassTableEntryOffset(patch); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); + break; + } case LinkerPatch::Type::kTypeBssEntry: { TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex()); uint32_t target_offset = writer_->bss_start_ + writer_->bss_type_entries_.Get(ref); @@ -1535,7 +1556,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(), patch.TargetStringIndex(), @@ -1603,6 +1623,42 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { data[2] = (address >> 16) & 0xffu; data[3] = (address >> 24) & 0xffu; } + + // Calculate the offset of the InternTable slot (GcRoot<String>) when mmapped to the .bss. + uint32_t GetInternTableEntryOffset(const LinkerPatch& patch) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!writer_->HasBootImage()); + const uint8_t* string_root = writer_->LookupBootImageInternTableSlot( + *patch.TargetStringDexFile(), patch.TargetStringIndex()); + DCHECK(string_root != nullptr); + return GetBootImageTableEntryOffset(string_root); + } + + // Calculate the offset of the ClassTable::TableSlot when mmapped to the .bss. + uint32_t GetClassTableEntryOffset(const LinkerPatch& patch) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!writer_->HasBootImage()); + const uint8_t* table_slot = + writer_->LookupBootImageClassTableSlot(*patch.TargetTypeDexFile(), patch.TargetTypeIndex()); + DCHECK(table_slot != nullptr); + return GetBootImageTableEntryOffset(table_slot); + } + + uint32_t GetBootImageTableEntryOffset(const uint8_t* raw_root) { + uint32_t base_offset = writer_->bss_start_; + for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) { + const uint8_t* const_tables_begin = + space->Begin() + space->GetImageHeader().GetBootImageConstantTablesOffset(); + size_t offset = static_cast<size_t>(raw_root - const_tables_begin); + if (offset < space->GetImageHeader().GetBootImageConstantTablesSize()) { + DCHECK_LE(base_offset + offset, writer_->bss_start_ + writer_->bss_methods_offset_); + return base_offset + offset; + } + base_offset += space->GetImageHeader().GetBootImageConstantTablesSize(); + } + LOG(FATAL) << "Didn't find boot image string in boot image intern tables!"; + UNREACHABLE(); + } }; class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor { @@ -1942,19 +1998,22 @@ void OatWriter::InitBssLayout(InstructionSet instruction_set) { DCHECK_EQ(bss_size_, 0u); if (HasBootImage()) { + DCHECK(!map_boot_image_tables_to_bss_); DCHECK(bss_string_entries_.empty()); - if (bss_method_entries_.empty() && bss_type_entries_.empty()) { - // Nothing to put to the .bss section. - return; - } + } + if (!map_boot_image_tables_to_bss_ && + bss_method_entries_.empty() && + bss_type_entries_.empty() && + bss_string_entries_.empty()) { + // Nothing to put to the .bss section. + return; } - // Allocate space for app dex cache arrays in the .bss section. + // Allocate space for boot image tables in the .bss section. PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); - if (!HasBootImage()) { - for (const DexFile* dex_file : *dex_files_) { - DexCacheArraysLayout layout(pointer_size, dex_file); - bss_size_ += layout.Size(); + if (map_boot_image_tables_to_bss_) { + for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) { + bss_size_ += space->GetImageHeader().GetBootImageConstantTablesSize(); } } @@ -3505,4 +3564,40 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer, OutputStream* out) const return true; } +const uint8_t* OatWriter::LookupBootImageInternTableSlot(const DexFile& dex_file, + dex::StringIndex string_idx) + NO_THREAD_SAFETY_ANALYSIS { // Single-threaded OatWriter can avoid locking. + uint32_t utf16_length; + const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length); + DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data)); + InternTable::Utf8String string(utf16_length, + utf8_data, + ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length)); + const InternTable* intern_table = Runtime::Current()->GetClassLinker()->intern_table_; + for (const InternTable::Table::UnorderedSet& table : intern_table->strong_interns_.tables_) { + auto it = table.Find(string); + if (it != table.end()) { + return reinterpret_cast<const uint8_t*>(std::addressof(*it)); + } + } + LOG(FATAL) << "Did not find boot image string " << utf8_data; + UNREACHABLE(); +} + +const uint8_t* OatWriter::LookupBootImageClassTableSlot(const DexFile& dex_file, + dex::TypeIndex type_idx) + NO_THREAD_SAFETY_ANALYSIS { // Single-threaded OatWriter can avoid locking. + const char* descriptor = dex_file.StringByTypeIdx(type_idx); + ClassTable::DescriptorHashPair pair(descriptor, ComputeModifiedUtf8Hash(descriptor)); + ClassTable* table = Runtime::Current()->GetClassLinker()->boot_class_table_.get(); + for (const ClassTable::ClassSet& class_set : table->classes_) { + auto it = class_set.Find(pair); + if (it != class_set.end()) { + return reinterpret_cast<const uint8_t*>(std::addressof(*it)); + } + } + LOG(FATAL) << "Did not find boot image class " << descriptor; + UNREACHABLE(); +} + } // namespace art diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 8db00f76d0..7f2045f8da 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -333,6 +333,12 @@ class OatWriter { bool MayHaveCompiledMethods() const; + // Find the address of the GcRoot<String> in the InternTable for a boot image string. + const uint8_t* LookupBootImageInternTableSlot(const DexFile& dex_file, + dex::StringIndex string_idx); + // Find the address of the ClassTable::TableSlot for a boot image class. + const uint8_t* LookupBootImageClassTableSlot(const DexFile& dex_file, dex::TypeIndex type_idx); + enum class WriteState { kAddingDexFileSources, kPrepareLayout, @@ -407,6 +413,10 @@ class OatWriter { // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`. SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_; + // Whether boot image tables should be mapped to the .bss. This is needed for compiled + // code that reads from these tables with PC-relative instructions. + bool map_boot_image_tables_to_bss_; + // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 2f96cfa382..a170734ff2 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -596,6 +596,7 @@ class BCEVisitor : public HGraphVisitor { // Helper method to assign a new range to an instruction in given basic block. void AssignRange(HBasicBlock* basic_block, HInstruction* instruction, ValueRange* range) { + DCHECK(!range->IsMonotonicValueRange() || instruction->IsLoopHeaderPhi()); GetValueRangeMap(basic_block)->Overwrite(instruction->GetId(), range); } @@ -1143,9 +1144,9 @@ class BCEVisitor : public HGraphVisitor { ValueBound(nullptr, 1 - right_const), ValueBound(nullptr, right_const - 1)); - ValueRange* left_range = LookupValueRange(left, left->GetBlock()); + ValueRange* left_range = LookupValueRange(left, instruction->GetBlock()); if (left_range != nullptr) { - right_range = left_range->Narrow(right_range); + right_range = right_range->Narrow(left_range); } AssignRange(instruction->GetBlock(), instruction, right_range); return; @@ -1172,9 +1173,9 @@ class BCEVisitor : public HGraphVisitor { GetGraph()->GetArena(), lower, upper); - ValueRange* left_range = LookupValueRange(left, left->GetBlock()); + ValueRange* left_range = LookupValueRange(left, instruction->GetBlock()); if (left_range != nullptr) { - right_range = left_range->Narrow(right_range); + right_range = right_range->Narrow(left_range); } AssignRange(instruction->GetBlock(), instruction, right_range); return; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3be774a421..19e5d067a1 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -21,6 +21,7 @@ #include "art_method.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -435,11 +436,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { // The string entry page address was preserved in temp_ thanks to kSaveEverything. } else { // For non-Baker read barrier, we need to re-calculate the address of the string entry page. - adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + adrp_label_ = arm64_codegen->NewStringBssEntryPatch(dex_file, string_index); arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); } vixl::aarch64::Label* strp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); + arm64_codegen->NewStringBssEntryPatch(dex_file, string_index, adrp_label_); { SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); __ Bind(strp_label); @@ -1463,6 +1464,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4675,6 +4677,13 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { baker_read_barrier_patches_.emplace_back(custom_data); return &baker_read_barrier_patches_.back().label; @@ -4764,6 +4773,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4775,14 +4785,17 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc linker_patches); } else { DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), info.custom_data)); @@ -4850,6 +4863,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -4961,6 +4975,25 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative type patch. + const DexFile& dex_file = cls->GetDexFile(); + dex::TypeIndex type_index = cls->GetTypeIndex(); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative type patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); + if (masked_hash != 0) { + __ Sub(out.W(), out.W(), Operand(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { // Add ADRP with its PC-relative Class .bss entry patch. const DexFile& dex_file = cls->GetDexFile(); @@ -5043,6 +5076,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5090,24 +5124,37 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative String patch. + const DexFile& dex_file = load->GetDexFile(); + const dex::StringIndex string_index = load->GetStringIndex(); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative String patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); + return; } case HLoadString::LoadKind::kBssEntry: { // Add ADRP with its PC-relative String .bss entry patch. @@ -5115,11 +5162,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD const dex::StringIndex string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0)); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative String patch. + // Add LDR with its .bss entry String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index c3392097a2..69c511907e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -599,6 +599,14 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new .bss entry string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new baker read barrier patch and return the label to be bound // before the CBNZ instruction. vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); @@ -825,8 +833,10 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d78756e964..8b9495d564 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -21,6 +21,7 @@ #include "art_method.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" +#include "class_table.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" @@ -598,7 +599,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + arm_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); arm_codegen->EmitMovwMovtPlaceholder(labels, temp); __ Str(r0, MemOperand(temp)); } @@ -2380,6 +2381,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -7121,6 +7123,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7233,6 +7236,20 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + __ Ldr(out, MemOperand(out, /* offset */ 0)); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Sub(out, out, Operand(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { vixl32::Register temp = (!kUseReadBarrier || kUseBakerReadBarrier) ? RegisterFrom(locations->GetTemp(0)) @@ -7315,6 +7332,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7372,14 +7390,22 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + __ Ldr(out, MemOperand(out, /* offset */ 0)); + return; } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); @@ -7387,7 +7413,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE ? RegisterFrom(locations->GetTemp(0)) : out; CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, temp); GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = @@ -9119,6 +9145,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeSt return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &string_bss_entry_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9187,6 +9218,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -9198,14 +9230,17 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa linker_patches); } else { DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), info.custom_data)); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 5feb33b1e1..e78bc15614 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -579,6 +579,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -803,8 +805,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 51f5b969d5..f0ef0071b6 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -20,6 +20,7 @@ #include "arch/mips/entrypoints_direct_mips.h" #include "arch/mips/instruction_set_features_mips.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -360,7 +361,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { // The string entry address was preserved in `entry_address` thanks to kSaveEverything. DCHECK(bss_info_high_); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, bss_info_high_); __ Sw(calling_convention.GetRegisterAt(0), entry_address, /* placeholder */ 0x5678, @@ -380,9 +381,9 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high); mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base); __ Sw(out, TMP, /* placeholder */ 0x5678, &info_low->label); } @@ -1101,6 +1102,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1651,7 +1653,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, @@ -1662,14 +1665,17 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch linker_patches); } else { DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1712,6 +1718,13 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPa return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, @@ -7365,6 +7378,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7401,6 +7415,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( fallback_load = false; break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7631,6 +7646,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: if (isR6) { break; @@ -7729,6 +7745,24 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_high = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_low = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, + out, + base_or_current_method_reg); + __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Addiu(out, out, -masked_hash); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { bss_info_high = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = @@ -7817,6 +7851,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: if (isR6) { break; @@ -7863,6 +7898,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -7882,7 +7918,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ out, base_or_current_method_reg); __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( @@ -7891,14 +7927,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; } - case HLoadString::LoadKind::kBssEntry: { + case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, + out, + base_or_current_method_reg); + __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + return; + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_high = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_low = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>(); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c0e1ec0fa2..f15f8c672a 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -633,6 +633,9 @@ class CodeGeneratorMIPS : public CodeGenerator { PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, @@ -699,8 +702,10 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 25fb1d05dc..201b1b065f 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -18,6 +18,7 @@ #include "arch/mips64/asm_support_mips64.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -318,9 +319,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { // The string entry address was preserved in `entry_address` thanks to kSaveEverything. DCHECK(bss_info_high_); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), - string_index, - bss_info_high_); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), + string_index, + bss_info_high_); __ Bind(&info_low->label); __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), @@ -339,9 +340,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { // For non-Baker read barriers we need to re-calculate the address of // the string entry. CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, info_low); __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); } @@ -1049,6 +1050,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1560,7 +1562,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, @@ -1571,14 +1574,17 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat linker_patches); } else { DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1621,6 +1627,13 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStri return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, @@ -5729,6 +5742,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5755,6 +5769,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6004,6 +6019,22 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Lwu(out, AT, /* placeholder */ 0x5678); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Daddiu(out, out, -masked_hash); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { bss_info_high = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = @@ -6117,7 +6148,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( @@ -6126,14 +6157,24 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA __ LoadLiteral(out, kLoadUnsignedWord, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; } - case HLoadString::LoadKind::kBssEntry: { + case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Lwu(out, AT, /* placeholder */ 0x5678); + return; + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; GpuRegister temp = non_baker_read_barrier ? out diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 72d474308e..3035621972 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -605,6 +605,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, @@ -666,8 +669,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative type patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 57e7dc6eed..37190f8363 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -49,7 +49,9 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi case Primitive::kPrimDouble: locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) : Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index efab0db118..edd0209f10 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -44,7 +44,8 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru case Primitive::kPrimDouble: locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) : Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -66,7 +67,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); __ punpcklbw(dst, dst); __ punpcklwd(dst, dst); __ pshufd(dst, dst, Immediate(0)); @@ -74,28 +75,28 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); __ punpcklwd(dst, dst); __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true); __ punpcklqdq(dst, dst); break; case Primitive::kPrimFloat: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufps(dst, dst, Immediate(0)); break; case Primitive::kPrimDouble: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufpd(dst, dst, Immediate(0)); break; default: @@ -139,11 +140,11 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in UNREACHABLE(); case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(locations->Out().AsRegister<CpuRegister>(), src); + __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit + __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: @@ -1004,12 +1005,12 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct } } -void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 99b7793c81..e45ad0a9a3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -17,6 +17,7 @@ #include "code_generator_x86.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -1035,6 +1036,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -4652,7 +4654,6 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { } void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_patches_.emplace_back(address, load_string->GetDexFile(), @@ -4664,9 +4665,9 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back( + string_bss_entry_patches_.emplace_back( address, load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset @@ -4691,7 +4692,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, @@ -4701,13 +4703,17 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(boot_image_type_patches_, + linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -6034,6 +6040,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6071,6 +6078,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kBootImageClassTable || load_kind == HLoadClass::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6147,6 +6155,19 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE __ movl(out, Immediate(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootTypePatch(cls); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ subl(out, Immediate(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { Register method_address = locations->InAt(0).AsRegister<Register>(); Address address(method_address, CodeGeneratorX86::kDummy32BitOffset); @@ -6219,6 +6240,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6237,6 +6259,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6282,14 +6305,21 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Register method_address = locations->InAt(0).AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f48753b614..b32d57a774 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -640,8 +640,10 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). + // String patch locations; type depends on configuration (intern table or boot image PIC). ArenaDeque<X86PcRelativePatchInfo> string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 8283887a96..8c4374d71e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -17,6 +17,7 @@ #include "code_generator_x86_64.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -1089,15 +1090,15 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { } void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); __ Bind(&string_patches_.back().label); } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + string_bss_entry_patches_.emplace_back( + load_string->GetDexFile(), load_string->GetStringIndex().index_); + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset @@ -1122,7 +1123,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, @@ -1132,13 +1134,17 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(boot_image_type_patches_, + linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1230,6 +1236,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { @@ -5451,6 +5458,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5559,6 +5567,18 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordBootTypePatch(cls); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ subl(out, Immediate(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false); @@ -5621,6 +5641,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5678,14 +5699,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA DCHECK(codegen_->GetCompilerOptions().IsBootImage()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordBootStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 33c64290d4..f5fa86bf23 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -611,8 +611,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). + // String patch locations; type depends on configuration (intern table or boot image PIC). ArenaDeque<PatchInfo<Label>> string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 6c3a9fd6b5..b558eb17a7 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -64,6 +64,11 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // A fence with "0" inputs is dead and should've been removed in a prior pass. DCHECK_NE(0u, ctor_fence->InputCount()); + // TODO: this should be simplified to 'return true' since it's + // potentially pessimizing any code sinking for inlined constructors with final fields. + // TODO: double check that if the final field assignments are not moved, + // then the fence is not moved either. + return ctor_fence->GetAssociatedAllocation() != nullptr; } diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc new file mode 100644 index 0000000000..ff7ce60905 --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constructor_fence_redundancy_elimination.h" + +#include "base/arena_allocator.h" + +namespace art { + +static constexpr bool kCfreLogFenceInputCount = false; + +// TODO: refactor this code by reusing escape analysis. +class CFREVisitor : public HGraphVisitor { + public: + CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArena()->GetArenaPool()), + candidate_fences_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // Visit all instructions in block. + HGraphVisitor::VisitBasicBlock(block); + + // If there were any unmerged fences left, merge them together, + // the objects are considered 'published' at the end of the block. + MergeCandidateFences(); + } + + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE { + candidate_fences_.push_back(constructor_fence); + + for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { + candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + } + } + + void VisitBoundType(HBoundType* bound_type) OVERRIDE { + VisitAlias(bound_type); + } + + void VisitNullCheck(HNullCheck* null_check) OVERRIDE { + VisitAlias(null_check); + } + + void VisitSelect(HSelect* select) OVERRIDE { + VisitAlias(select); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(2); + VisitSetLocation(instruction, value); + } + + void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) { + // Pessimize: Merge all fences. + MergeCandidateFences(); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + HandleInvoke(clinit); + } + + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + private: + void HandleInvoke(HInstruction* invoke) { + // An object is considered "published" if it escapes into an invoke as any of the parameters. + if (HasInterestingPublishTargetAsInput(invoke)) { + MergeCandidateFences(); + } + } + + // Called by any instruction visitor that may create an alias. + // These instructions may create an alias: + // - BoundType + // - NullCheck + // - Select + // + // These also create an alias, but are not handled by this function: + // - Phi: propagates values across blocks, but we always merge at the end of a block. + // - Invoke: this is handled by HandleInvoke. + void VisitAlias(HInstruction* aliasing_inst) { + // An object is considered "published" if it becomes aliased by other instructions. + if (HasInterestingPublishTargetAsInput(aliasing_inst)) { + // Note that constructing a "NullCheck" for new-instance, new-array, + // or a 'this' (receiver) reference is impossible. + // + // If by some reason we actually encounter such a NullCheck(FenceTarget), + // we LOG(WARNING). + if (UNLIKELY(aliasing_inst->IsNullCheck())) { + LOG(kIsDebugBuild ? FATAL : WARNING) + << "Unexpected instruction: NullCheck; should not be legal in graph"; + // We then do a best-effort to handle this case. + } + MergeCandidateFences(); + } + } + + void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) { + // An object is considered "published" if it's stored onto the heap. + // Sidenote: A later "LSE" pass can still remove the fence if it proves the + // object doesn't actually escape. + if (IsInterestingPublishTarget(store_input)) { + // Merge all constructor fences that we've seen since + // the last interesting store (or since the beginning). + MergeCandidateFences(); + } + } + + bool HasInterestingPublishTargetAsInput(HInstruction* inst) { + for (size_t input_count = 0; input_count < inst->InputCount(); ++input_count) { + if (IsInterestingPublishTarget(inst->InputAt(input_count))) { + return true; + } + } + + return false; + } + + // Merges all the existing fences we've seen so far into the last-most fence. + // + // This resets the list of candidate fences and their targets back to {}. + void MergeCandidateFences() { + if (candidate_fences_.empty()) { + // Nothing to do, need 1+ fences to merge. + return; + } + + // The merge target is always the "last" candidate fence. + HConstructorFence* merge_target = candidate_fences_[candidate_fences_.size() - 1]; + + for (HConstructorFence* fence : candidate_fences_) { + MaybeMerge(merge_target, fence); + } + + if (kCfreLogFenceInputCount) { + LOG(INFO) << "CFRE-MergeCandidateFences: Post-merge fence input count " + << merge_target->InputCount(); + } + + // Each merge acts as a cut-off point. The optimization is reset completely. + // In theory, we could push the fence as far as its publish, but in practice + // there is no benefit to this extra complexity unless we also reordered + // the stores to come later. + candidate_fences_.clear(); + candidate_fence_targets_.Clear(); + } + + // A publishing 'store' is only interesting if the value being stored + // is one of the fence `targets` in `candidate_fences`. + bool IsInterestingPublishTarget(HInstruction* store_input) const { + return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + } + + void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { + if (target == src) { + return; // Don't merge a fence into itself. + // This is mostly for stats-purposes, we don't want to count merge(x,x) + // as removing a fence because it's a no-op. + } + + target->Merge(src); + + MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedCFRE); + } + + // Phase-local heap memory allocator for CFRE optimizer. Storage obtained + // through this allocator is immediately released when the CFRE optimizer is done. + ArenaAllocator scoped_allocator_; + + // Set of constructor fences that we've seen in the current block. + // Each constructor fences acts as a guard for one or more `targets`. + // There exist no stores to any `targets` between any of these fences. + // + // Fences are in succession order (e.g. fence[i] succeeds fence[i-1] + // within the same basic block). + ArenaVector<HConstructorFence*> candidate_fences_; + + // Stores a set of the fence targets, to allow faster lookup of whether + // a detected publish is a target of one of the candidate fences. + ArenaHashSet<HInstruction*> candidate_fence_targets_; + + // Used to record stats about the optimization. + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(CFREVisitor); +}; + +void ConstructorFenceRedundancyElimination::Run() { + CFREVisitor cfre_visitor(graph_, stats_); + + // Arbitrarily visit in reverse-post order. + // The exact block visit order does not matter, as the algorithm + // only operates on a single block at a time. + cfre_visitor.VisitReversePostOrder(); +} + +} // namespace art diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h new file mode 100644 index 0000000000..d89210cd1c --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ + +#include "optimization.h" + +namespace art { + +/* + * Constructor Fence Redundancy Elimination (CFRE). + * + * A local optimization pass that merges redundant constructor fences + * together within the same basic block. + * + * Abbreviations: + * - CF: Constructor Fence + * - CFS: Constructor Fence Set + * - CFTargets: The unique set of the inputs of all the instructions in CFS. + * + * Given any CFS = { CF(x), CF(y), CF(z), ... }, define CFTargets = { x, y, z, ... }. + * - Publish(R) must not exist for any R in CFTargets if this Publish(R) is between any CF in CFS. + * - This type of Publish(R) is called an "interesting publish". + * + * A Publish(R) is considered any instruction at which the reference to "R" + * may escape (e.g. invoke, store, return, etc) to another thread. + * + * Starting at the beginning of the block: + * - Find the largest contiguous CFS. + * - If we see an interesting publish, merge all instructions in CFS into a single CF(CFTargets). + * - Repeat until the block is fully visited. + * - At the end of the block, merge all instructions in CFS into a single CF(CFTargets). + */ +class ConstructorFenceRedundancyElimination : public HOptimization { + public: + ConstructorFenceRedundancyElimination(HGraph* graph, + OptimizingCompilerStats* stats) + : HOptimization(graph, kPassName, stats) {} + + void Run() OVERRIDE; + + static constexpr const char* kPassName = "constructor_fence_redundancy_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(ConstructorFenceRedundancyElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 089340e715..191d3d128c 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -670,6 +670,15 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(static_cast<int32_t>(value))); } + } else if (instruction->IsSub()) { + // Incorporate suitable constants in the chased value. + if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { + return SubValue(Value(static_cast<int32_t>(value)), + GetFetch(instruction->InputAt(1), trip, in_body, !is_min)); + } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) { + return SubValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), + Value(static_cast<int32_t>(value))); + } } else if (instruction->IsArrayLength()) { // Exploit length properties when chasing constants or chase into a new array declaration. if (chase_hint_ == nullptr) { diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 2b82b336d7..9437014407 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -723,6 +723,29 @@ TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr)); } +TEST_F(InductionVarRangeTest, AddOrSubAndConstant) { + HInstruction* add = new (&allocator_) + HAdd(Primitive::kPrimInt, x_, graph_->GetIntConstant(-1)); + HInstruction* alt = new (&allocator_) + HAdd(Primitive::kPrimInt, graph_->GetIntConstant(-1), x_); + HInstruction* sub = new (&allocator_) + HSub(Primitive::kPrimInt, x_, graph_->GetIntConstant(1)); + HInstruction* rev = new (&allocator_) + HSub(Primitive::kPrimInt, graph_->GetIntConstant(1), x_); + entry_block_->AddInstruction(add); + entry_block_->AddInstruction(alt); + entry_block_->AddInstruction(sub); + entry_block_->AddInstruction(rev); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(add), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(add), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(alt), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(alt), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(sub), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(sub), nullptr)); + ExpectEqual(Value(x_, -1, 1), GetMin(CreateFetch(rev), nullptr)); + ExpectEqual(Value(x_, -1, 1), GetMax(CreateFetch(rev), nullptr)); +} + // // Tests on public methods. // diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index ca3b191cb0..6532ec123d 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -25,6 +25,7 @@ #include "quicken_info.h" #include "scoped_thread_state_change-inl.h" #include "sharpening.h" +#include "well_known_classes.h" namespace art { diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index f8f4eb2ae3..baa045390b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -34,6 +34,9 @@ static constexpr bool kEnableVectorization = true; // All current SIMD targets want 16-byte alignment. static constexpr size_t kAlignedBase = 16; +// No loop unrolling factor (just one copy of the loop-body). +static constexpr uint32_t kNoUnrollingFactor = 1; + // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. static void RemoveFromCycle(HInstruction* instruction) { @@ -331,8 +334,9 @@ static bool CheckInductionSetFullyRemoved(ArenaSet<HInstruction*>* iset) { HLoopOptimization::HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis) - : HOptimization(graph, kLoopOptimizationPassName), + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats) + : HOptimization(graph, kLoopOptimizationPassName, stats), compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), @@ -625,6 +629,7 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { Vectorize(node, body, exit, trip_count); graph_->SetHasSIMD(true); // flag SIMD usage + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); return true; } return false; @@ -789,7 +794,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetIntConstant(1), - /*unroll*/ 1); + kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -816,7 +821,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetIntConstant(1), - /*unroll*/ 1); + kNoUnrollingFactor); } // Link reductions to their final uses. @@ -1724,6 +1729,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, vector_length_, is_unsigned, is_rounded)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); } @@ -1758,23 +1764,37 @@ void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate, vector_peeling_candidate_ = candidate; } +static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; +static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; + uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { - // Current heuristic: unroll by 2 on ARM64/X86 for large known trip - // counts and small loop bodies. - // TODO: refine with operation count, remaining iterations, etc. - // Artem had some really cool ideas for this already. switch (compiler_driver_->GetInstructionSet()) { - case kArm64: - case kX86: - case kX86_64: { - size_t num_instructions = block->GetInstructions().CountSize(); - if (num_instructions <= 10 && trip_count >= 4 * vector_length_) { - return 2; + case kArm64: { + // Don't unroll with insufficient iterations. + // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length_, 0u); + if (trip_count < 2 * vector_length_) { + return kNoUnrollingFactor; } - return 1; + // Don't unroll for large loop body size. + uint32_t instruction_count = block->GetInstructions().CountSize(); + if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { + return kNoUnrollingFactor; + } + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; + uint32_t uf2 = trip_count / vector_length_; + uint32_t unroll_factor = + TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); + DCHECK_GE(unroll_factor, 1u); + return unroll_factor; } + case kX86: + case kX86_64: default: - return 1; + return kNoUnrollingFactor; } } diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index ba9126c5f6..f34751815b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -34,7 +34,8 @@ class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis); + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats); void Run() OVERRIDE; diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index b5b03d8f26..1c5603d00f 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest { allocator_(&pool_), graph_(CreateGraph(&allocator_)), iva_(new (&allocator_) HInductionVarAnalysis(graph_)), - loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) { + loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { BuildGraph(); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 1510eafa40..9cff6b005b 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1289,18 +1289,59 @@ size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { return remove_count; } -HInstruction* HConstructorFence::GetAssociatedAllocation() { +void HConstructorFence::Merge(HConstructorFence* other) { + // Do not delete yourself from the graph. + DCHECK(this != other); + // Don't try to merge with an instruction not associated with a block. + DCHECK(other->GetBlock() != nullptr); + // A constructor fence's return type is "kPrimVoid" + // and therefore it cannot have any environment uses. + DCHECK(!other->HasEnvironmentUses()); + + auto has_input = [](HInstruction* haystack, HInstruction* needle) { + // Check if `haystack` has `needle` as any of its inputs. + for (size_t input_count = 0; input_count < haystack->InputCount(); ++input_count) { + if (haystack->InputAt(input_count) == needle) { + return true; + } + } + return false; + }; + + // Add any inputs from `other` into `this` if it wasn't already an input. + for (size_t input_count = 0; input_count < other->InputCount(); ++input_count) { + HInstruction* other_input = other->InputAt(input_count); + if (!has_input(this, other_input)) { + AddInput(other_input); + } + } + + other->GetBlock()->RemoveInstruction(other); +} + +HInstruction* HConstructorFence::GetAssociatedAllocation(bool ignore_inputs) { HInstruction* new_instance_inst = GetPrevious(); // Check if the immediately preceding instruction is a new-instance/new-array. // Otherwise this fence is for protecting final fields. if (new_instance_inst != nullptr && (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) { - // TODO: Need to update this code to handle multiple inputs. - DCHECK_EQ(InputCount(), 1u); - return new_instance_inst; - } else { - return nullptr; + if (ignore_inputs) { + // If inputs are ignored, simply check if the predecessor is + // *any* HNewInstance/HNewArray. + // + // Inputs are normally only ignored for prepare_for_register_allocation, + // at which point *any* prior HNewInstance/Array can be considered + // associated. + return new_instance_inst; + } else { + // Normal case: There must be exactly 1 input and the previous instruction + // must be that input. + if (InputCount() == 1u && InputAt(0) == new_instance_inst) { + return new_instance_inst; + } + } } + return nullptr; } #define DEFINE_ACCEPT(name, super) \ @@ -2736,6 +2777,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: + case LoadKind::kBootImageClassTable: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetClass().Get() == other_load_class->GetClass().Get(); @@ -2769,6 +2811,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadClass::LoadKind::kBootImageAddress: return os << "BootImageAddress"; + case HLoadClass::LoadKind::kBootImageClassTable: + return os << "BootImageClassTable"; case HLoadClass::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: @@ -2791,6 +2835,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: + case LoadKind::kBootImageInternTable: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2821,6 +2866,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadString::LoadKind::kBootImageAddress: return os << "BootImageAddress"; + case HLoadString::LoadKind::kBootImageInternTable: + return os << "BootImageInternTable"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 869fdd4182..a6d0da1c96 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -5676,6 +5676,10 @@ class HLoadClass FINAL : public HInstruction { // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. kBootImageAddress, + // Use a PC-relative load from a boot image ClassTable mmapped into the .bss + // of the oat file. + kBootImageClassTable, + // Load from an entry in the .bss section using a PC-relative load. // Used for classes outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -5821,6 +5825,7 @@ class HLoadClass FINAL : public HInstruction { static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kBootImageClassTable || load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kRuntimeCall; } @@ -5854,6 +5859,7 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || GetLoadKind() == LoadKind::kBootImageAddress || + GetLoadKind() == LoadKind::kBootImageClassTable || GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); @@ -5872,6 +5878,10 @@ class HLoadString FINAL : public HInstruction { // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. kBootImageAddress, + // Use a PC-relative load from a boot image InternTable mmapped into the .bss + // of the oat file. + kBootImageInternTable, + // Load from an entry in the .bss section using a PC-relative load. // Used for strings outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -5931,6 +5941,7 @@ class HLoadString FINAL : public HInstruction { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBootImageAddress || + load_kind == LoadKind::kBootImageInternTable || load_kind == LoadKind::kJitTableAddress) { return false; } @@ -5991,8 +6002,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBssEntry || - GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageAddress || + GetLoadKind() == LoadKind::kBootImageInternTable || + GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6637,13 +6649,24 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // Returns how many HConstructorFence instructions were removed from graph. static size_t RemoveConstructorFences(HInstruction* instruction); + // Combine all inputs of `this` and `other` instruction and remove + // `other` from the graph. + // + // Inputs are unique after the merge. + // + // Requirement: `this` must not be the same as `other. + void Merge(HConstructorFence* other); + // Check if this constructor fence is protecting // an HNewInstance or HNewArray that is also the immediate // predecessor of `this`. // + // If `ignore_inputs` is true, then the immediate predecessor doesn't need + // to be one of the inputs of `this`. + // // Returns the associated HNewArray or HNewInstance, // or null otherwise. - HInstruction* GetAssociatedAllocation(); + HInstruction* GetAssociatedAllocation(bool ignore_inputs = false); DECLARE_INSTRUCTION(ConstructorFence); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e98c97cf9a..399cd98983 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -53,6 +53,7 @@ #include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" +#include "constructor_fence_redundancy_elimination.h" #include "dead_code_elimination.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" @@ -509,11 +510,13 @@ static HOptimization* BuildOptimization( } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { - return new (arena) HLoopOptimization(graph, driver, most_recent_induction); + return new (arena) HLoopOptimization(graph, driver, most_recent_induction, stats); } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { return new (arena) CHAGuardOptimization(graph); } else if (opt_name == CodeSinking::kCodeSinkingPassName) { return new (arena) CodeSinking(graph, stats); + } else if (opt_name == ConstructorFenceRedundancyElimination::kPassName) { + return new (arena) ConstructorFenceRedundancyElimination(graph, stats); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { return new (arena) arm::InstructionSimplifierArm(graph, stats); @@ -770,7 +773,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, LICM* licm = new (arena) LICM(graph, *side_effects1, stats); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); - HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction, stats); LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats); HSharpening* sharpening = new (arena) HSharpening( @@ -784,6 +787,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); + ConstructorFenceRedundancyElimination* cfre = + new (arena) ConstructorFenceRedundancyElimination(graph, stats); HOptimization* optimizations1[] = { intrinsics, @@ -821,6 +826,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. simplify4, + cfre, // Eliminate constructor fences after code sinking to avoid + // complicated sinking logic to split a fence with many inputs. }; RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index d6da73cc1c..07f9635aba 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -63,6 +63,8 @@ enum MethodCompilationStat { kBooleanSimplified, kIntrinsicRecognized, kLoopInvariantMoved, + kLoopVectorized, + kLoopVectorizedIdiom, kSelectGenerated, kRemovedInstanceOf, kInlinedInvokeVirtualOrInterface, @@ -91,6 +93,7 @@ enum MethodCompilationStat { kConstructorFenceGeneratedFinal, kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, + kConstructorFenceRemovedCFRE, kLastStat }; @@ -183,6 +186,8 @@ class OptimizingCompilerStats { case kBooleanSimplified : name = "BooleanSimplified"; break; case kIntrinsicRecognized : name = "IntrinsicRecognized"; break; case kLoopInvariantMoved : name = "LoopInvariantMoved"; break; + case kLoopVectorized : name = "LoopVectorized"; break; + case kLoopVectorizedIdiom : name = "LoopVectorizedIdiom"; break; case kSelectGenerated : name = "SelectGenerated"; break; case kRemovedInstanceOf: name = "RemovedInstanceOf"; break; case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break; @@ -211,6 +216,7 @@ class OptimizingCompilerStats { case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break; case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break; case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break; + case kConstructorFenceRemovedCFRE: name = "ConstructorFenceRemovedCFRE"; break; case kLastStat: LOG(FATAL) << "invalid stat " diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 21b645279e..e569b78c9d 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -75,6 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); @@ -88,8 +89,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 2743df9dcf..9877e10474 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadClass(HLoadClass* load_class) OVERRIDE { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kBootImageClassTable || load_kind == HLoadClass::LoadKind::kBssEntry) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); load_class->AddSpecialInput(method_address); @@ -92,6 +93,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 9536d149f6..e46c9a7081 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -205,11 +205,15 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, // TODO(ngeoffray): Generate HDeoptimize instead. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } - } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { - // AOT app compilation. Check if the class is in the boot image. - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + } else if (is_in_boot_image) { + // AOT app compilation, boot image class. + if (codegen->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable; + } else { + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + } } else { - // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. + // Not JIT and the klass is not in boot image. desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } @@ -278,10 +282,12 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else { // AOT app compilation. Try to lookup the string without allocating if not found. string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); - if (string != nullptr && - runtime->GetHeap()->ObjectIsInBootImageSpace(string) && - !codegen_->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + if (codegen_->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; + } else { + desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + } } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index ef53d7237c..5ab558b256 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -159,7 +159,9 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -213,7 +215,9 @@ class AssemblerTest : public testing::Test { for (auto reg3 : reg3_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -272,7 +276,9 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(new_imm, *reg1, *reg2); + if (f != nullptr) { + (assembler_.get()->*f)(new_imm, *reg1, *reg2); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -320,7 +326,9 @@ class AssemblerTest : public testing::Test { for (auto reg : registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg, new_imm + bias); + if (f != nullptr) { + (assembler_.get()->*f)(*reg, new_imm + bias); + } std::string base = fmt; std::string reg_string = (this->*GetName)(*reg); @@ -522,7 +530,9 @@ class AssemblerTest : public testing::Test { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(new_imm); + if (f != nullptr) { + (assembler_.get()->*f)(new_imm); + } std::string base = fmt; size_t imm_index = base.find(IMM_TOKEN); @@ -835,7 +845,9 @@ class AssemblerTest : public testing::Test { const std::string& fmt) { std::string str; for (auto reg : registers) { - (assembler_.get()->*f)(*reg); + if (f != nullptr) { + (assembler_.get()->*f)(*reg); + } std::string base = fmt; std::string reg_string = (this->*GetName)(*reg); @@ -866,7 +878,9 @@ class AssemblerTest : public testing::Test { std::string str; for (auto reg1 : reg1_registers) { for (auto reg2 : reg2_registers) { - (assembler_.get()->*f)(*reg1, *reg2); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -905,7 +919,9 @@ class AssemblerTest : public testing::Test { for (auto reg1 : reg1_registers) { for (auto reg2 : reg2_registers) { if (reg1 == reg2) continue; - (assembler_.get()->*f)(*reg1, *reg2); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -944,7 +960,9 @@ class AssemblerTest : public testing::Test { for (auto reg1 : reg1_registers) { for (auto reg2 : reg2_registers) { for (auto reg3 : reg3_registers) { - (assembler_.get()->*f)(*reg1, *reg2, *reg3); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2, *reg3); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -993,7 +1011,9 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm); + if (f != nullptr) { + (assembler_.get()->*f)(*reg1, *reg2, new_imm); + } std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -1094,7 +1114,9 @@ class AssemblerTest : public testing::Test { for (auto reg : registers) { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg, new_imm); + if (f != nullptr) { + (assembler_.get()->*f)(*reg, new_imm); + } std::string base = fmt; std::string reg_string = GetRegName<kRegView>(*reg); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 4c8fb68d62..dd6dcd1896 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -86,7 +86,7 @@ static inline int InstrCountForLoadReplicatedConst32(int64_t value) { int32_t y = High32Bits(value); if (x == y) { - return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3; + return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3; } return INT_MAX; diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index d2122db3fa..48e41173ff 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -33,6 +33,10 @@ TEST(AssemblerX86, CreateBuffer) { ASSERT_EQ(static_cast<size_t>(5), buffer.Size()); } +// +// Test fixture. +// + class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register, x86::XmmRegister, x86::Immediate> { public: @@ -105,6 +109,56 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register, std::vector<x86::XmmRegister*> fp_registers_; }; +// +// Test repeat drivers used in the tests. +// + +TEST_F(AssemblerX86Test, RepeatRR) { + EXPECT_EQ("%eax %eax\n%eax %ebx\n%eax %ecx\n%eax %edx\n%eax %ebp\n%eax %esp\n%eax %esi\n" + "%eax %edi\n%ebx %eax\n%ebx %ebx\n%ebx %ecx\n%ebx %edx\n%ebx %ebp\n%ebx %esp\n" + "%ebx %esi\n%ebx %edi\n%ecx %eax\n%ecx %ebx\n%ecx %ecx\n%ecx %edx\n%ecx %ebp\n" + "%ecx %esp\n%ecx %esi\n%ecx %edi\n%edx %eax\n%edx %ebx\n%edx %ecx\n%edx %edx\n" + "%edx %ebp\n%edx %esp\n%edx %esi\n%edx %edi\n%ebp %eax\n%ebp %ebx\n%ebp %ecx\n" + "%ebp %edx\n%ebp %ebp\n%ebp %esp\n%ebp %esi\n%ebp %edi\n%esp %eax\n%esp %ebx\n" + "%esp %ecx\n%esp %edx\n%esp %ebp\n%esp %esp\n%esp %esi\n%esp %edi\n%esi %eax\n" + "%esi %ebx\n%esi %ecx\n%esi %edx\n%esi %ebp\n%esi %esp\n%esi %esi\n%esi %edi\n" + "%edi %eax\n%edi %ebx\n%edi %ecx\n%edi %edx\n%edi %ebp\n%edi %esp\n%edi %esi\n" + "%edi %edi\n", + RepeatRR(/*f*/ nullptr, "%{reg1} %{reg2}")); +} + +TEST_F(AssemblerX86Test, RepeatRI) { + EXPECT_EQ("%eax %0\n%eax %-1\n%eax %18\n%ebx %0\n%ebx %-1\n%ebx %18\n%ecx %0\n%ecx %-1\n" + "%ecx %18\n%edx %0\n%edx %-1\n%edx %18\n%ebp %0\n%ebp %-1\n%ebp %18\n%esp %0\n" + "%esp %-1\n%esp %18\n%esi %0\n%esi %-1\n%esi %18\n%edi %0\n%edi %-1\n%edi %18\n", + RepeatRI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} %{imm}")); +} + +TEST_F(AssemblerX86Test, RepeatFF) { + EXPECT_EQ("%XMM0 %XMM0\n%XMM0 %XMM1\n%XMM0 %XMM2\n%XMM0 %XMM3\n%XMM0 %XMM4\n%XMM0 %XMM5\n" + "%XMM0 %XMM6\n%XMM0 %XMM7\n%XMM1 %XMM0\n%XMM1 %XMM1\n%XMM1 %XMM2\n%XMM1 %XMM3\n" + "%XMM1 %XMM4\n%XMM1 %XMM5\n%XMM1 %XMM6\n%XMM1 %XMM7\n%XMM2 %XMM0\n%XMM2 %XMM1\n" + "%XMM2 %XMM2\n%XMM2 %XMM3\n%XMM2 %XMM4\n%XMM2 %XMM5\n%XMM2 %XMM6\n%XMM2 %XMM7\n" + "%XMM3 %XMM0\n%XMM3 %XMM1\n%XMM3 %XMM2\n%XMM3 %XMM3\n%XMM3 %XMM4\n%XMM3 %XMM5\n" + "%XMM3 %XMM6\n%XMM3 %XMM7\n%XMM4 %XMM0\n%XMM4 %XMM1\n%XMM4 %XMM2\n%XMM4 %XMM3\n" + "%XMM4 %XMM4\n%XMM4 %XMM5\n%XMM4 %XMM6\n%XMM4 %XMM7\n%XMM5 %XMM0\n%XMM5 %XMM1\n" + "%XMM5 %XMM2\n%XMM5 %XMM3\n%XMM5 %XMM4\n%XMM5 %XMM5\n%XMM5 %XMM6\n%XMM5 %XMM7\n" + "%XMM6 %XMM0\n%XMM6 %XMM1\n%XMM6 %XMM2\n%XMM6 %XMM3\n%XMM6 %XMM4\n%XMM6 %XMM5\n" + "%XMM6 %XMM6\n%XMM6 %XMM7\n%XMM7 %XMM0\n%XMM7 %XMM1\n%XMM7 %XMM2\n%XMM7 %XMM3\n" + "%XMM7 %XMM4\n%XMM7 %XMM5\n%XMM7 %XMM6\n%XMM7 %XMM7\n", + RepeatFF(/*f*/ nullptr, "%{reg1} %{reg2}")); +} + +TEST_F(AssemblerX86Test, RepeatFFI) { + EXPECT_NE(RepeatFFI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg1} %{reg2} %{imm}") + .find("%XMM0 %XMM0 %0\n%XMM0 %XMM0 %-1\n%XMM0 %XMM0 %18\n" + "%XMM0 %XMM1 %0\n%XMM0 %XMM1 %-1\n%XMM0 %XMM1 %18\n"), + std::string::npos); +} + +// +// Actual x86 instruction assembler tests. +// TEST_F(AssemblerX86Test, Movl) { GetAssembler()->movl(x86::EAX, x86::EBX); @@ -838,10 +892,6 @@ TEST_F(AssemblerX86Test, psrldq) { DriverStr("psrldq $0x10, %xmm0\n", "psrldqi"); } -///////////////// -// Near labels // -///////////////// - TEST_F(AssemblerX86Test, Jecxz) { x86::NearLabel target; GetAssembler()->jecxz(&target); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 85afee0746..29de92514c 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -126,6 +126,10 @@ struct X86_64CpuRegisterCompare { } }; +// +// Test fixture. +// + class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64::CpuRegister, x86_64::XmmRegister, x86_64::Immediate> { public: @@ -273,12 +277,130 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64 std::vector<x86_64::XmmRegister*> fp_registers_; }; +// +// Test repeat drivers used in the tests. +// + +TEST_F(AssemblerX86_64Test, RepeatI4) { + EXPECT_EQ("%0\n%-1\n%18\n%4660\n%-4660\n%305419896\n%-305419896\n", + RepeatI(/*f*/ nullptr, /*imm_bytes*/ 4U, "%{imm}")); +} + +TEST_F(AssemblerX86_64Test, RepeatI8) { + EXPECT_EQ("%0\n%-1\n%18\n%4660\n%-4660\n%305419896\n%-305419896\n" + "%20015998343868\n%-20015998343868\n%1311768467463790320\n" + "%-1311768467463790320\n", + RepeatI(/*f*/ nullptr, /*imm_bytes*/ 8U, "%{imm}")); +} + +TEST_F(AssemblerX86_64Test, Repeatr) { + EXPECT_EQ("%eax\n%ebx\n%ecx\n%edx\n%ebp\n%esp\n%esi\n%edi\n" + "%r8d\n%r9d\n%r10d\n%r11d\n%r12d\n%r13d\n%r14d\n%r15d\n", + Repeatr(/*f*/ nullptr, "%{reg}")); +} + +TEST_F(AssemblerX86_64Test, Repeatri) { + EXPECT_NE(Repeatri(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} %{imm}"). + find("%eax %0\n%eax %-1\n%eax %18\n%ebx %0\n%ebx %-1\n%ebx %18\n" + "%ecx %0\n%ecx %-1\n%ecx %18\n%edx %0\n%edx %-1\n%edx %18\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, Repeatrr) { + EXPECT_NE(Repeatrr(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%eax %eax\n%eax %ebx\n%eax %ecx\n%eax %edx\n" + "%eax %ebp\n%eax %esp\n%eax %esi\n%eax %edi\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, Repeatrb) { + EXPECT_NE(Repeatrb(/*f*/ nullptr, "%{reg1} %{reg2}"). + find("%eax %al\n%eax %bl\n%eax %cl\n%eax %dl\n%eax %bpl\n" + "%eax %spl\n%eax %sil\n%eax %dil\n%eax %r8b\n%eax %r9b\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatrF) { + EXPECT_NE(RepeatrF(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%eax %xmm0\n%eax %xmm1\n%eax %xmm2\n%eax %xmm3\n" + "%eax %xmm4\n%eax %xmm5\n%eax %xmm6\n%eax %xmm7\n" + "%eax %xmm8\n%eax %xmm9\n%eax %xmm10\n%eax %xmm11\n" + "%eax %xmm12\n%eax %xmm13\n%eax %xmm14\n%eax %xmm15\n" + "%ebx %xmm0\n%ebx %xmm1\n%ebx %xmm2\n%ebx %xmm3\n%ebx %xmm4\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatR) { + EXPECT_EQ("%rax\n%rbx\n%rcx\n%rdx\n%rbp\n%rsp\n%rsi\n%rdi\n" + "%r8\n%r9\n%r10\n%r11\n%r12\n%r13\n%r14\n%r15\n", + RepeatR(/*f*/ nullptr, "%{reg}")); +} + +TEST_F(AssemblerX86_64Test, RepeatRI) { + EXPECT_EQ("%rax %0\n%rax %-1\n%rax %18\n%rbx %0\n%rbx %-1\n%rbx %18\n" + "%rcx %0\n%rcx %-1\n%rcx %18\n%rdx %0\n%rdx %-1\n%rdx %18\n" + "%rbp %0\n%rbp %-1\n%rbp %18\n%rsp %0\n%rsp %-1\n%rsp %18\n" + "%rsi %0\n%rsi %-1\n%rsi %18\n%rdi %0\n%rdi %-1\n%rdi %18\n" + "%r8 %0\n%r8 %-1\n%r8 %18\n%r9 %0\n%r9 %-1\n%r9 %18\n" + "%r10 %0\n%r10 %-1\n%r10 %18\n%r11 %0\n%r11 %-1\n%r11 %18\n" + "%r12 %0\n%r12 %-1\n%r12 %18\n%r13 %0\n%r13 %-1\n%r13 %18\n" + "%r14 %0\n%r14 %-1\n%r14 %18\n%r15 %0\n%r15 %-1\n%r15 %18\n", + RepeatRI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} %{imm}")); +} + +TEST_F(AssemblerX86_64Test, RepeatRr) { + EXPECT_NE(RepeatRr(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%rax %eax\n%rax %ebx\n%rax %ecx\n%rax %edx\n%rax %ebp\n" + "%rax %esp\n%rax %esi\n%rax %edi\n%rax %r8d\n%rax %r9d\n" + "%rax %r10d\n%rax %r11d\n%rax %r12d\n%rax %r13d\n%rax %r14d\n" + "%rax %r15d\n%rbx %eax\n%rbx %ebx\n%rbx %ecx\n%rbx %edx\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatRR) { + EXPECT_NE(RepeatRR(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%rax %rax\n%rax %rbx\n%rax %rcx\n%rax %rdx\n%rax %rbp\n" + "%rax %rsp\n%rax %rsi\n%rax %rdi\n%rax %r8\n%rax %r9\n" + "%rax %r10\n%rax %r11\n%rax %r12\n%rax %r13\n%rax %r14\n" + "%rax %r15\n%rbx %rax\n%rbx %rbx\n%rbx %rcx\n%rbx %rdx\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatRF) { + EXPECT_NE(RepeatRF(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%rax %xmm0\n%rax %xmm1\n%rax %xmm2\n%rax %xmm3\n%rax %xmm4\n" + "%rax %xmm5\n%rax %xmm6\n%rax %xmm7\n%rax %xmm8\n%rax %xmm9\n" + "%rax %xmm10\n%rax %xmm11\n%rax %xmm12\n%rax %xmm13\n%rax %xmm14\n" + "%rax %xmm15\n%rbx %xmm0\n%rbx %xmm1\n%rbx %xmm2\n%rbx %xmm3\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatFF) { + EXPECT_NE(RepeatFF(/*f*/ nullptr, "%{reg1} %{reg2}") + .find("%xmm0 %xmm0\n%xmm0 %xmm1\n%xmm0 %xmm2\n%xmm0 %xmm3\n%xmm0 %xmm4\n" + "%xmm0 %xmm5\n%xmm0 %xmm6\n%xmm0 %xmm7\n%xmm0 %xmm8\n%xmm0 %xmm9\n" + "%xmm0 %xmm10\n%xmm0 %xmm11\n%xmm0 %xmm12\n%xmm0 %xmm13\n%xmm0 %xmm14\n" + "%xmm0 %xmm15\n%xmm1 %xmm0\n%xmm1 %xmm1\n%xmm1 %xmm2\n%xmm1 %xmm3\n"), + std::string::npos); +} + +TEST_F(AssemblerX86_64Test, RepeatFFI) { + EXPECT_NE(RepeatFFI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg1} %{reg2} %{imm}") + .find("%xmm0 %xmm0 %0\n%xmm0 %xmm0 %-1\n%xmm0 %xmm0 %18\n" + "%xmm0 %xmm1 %0\n%xmm0 %xmm1 %-1\n%xmm0 %xmm1 %18\n" + "%xmm0 %xmm2 %0\n%xmm0 %xmm2 %-1\n%xmm0 %xmm2 %18\n" + "%xmm0 %xmm3 %0\n%xmm0 %xmm3 %-1\n%xmm0 %xmm3 %18\n"), + std::string::npos); +} + +// +// Actual x86-64 instruction assembler tests. +// TEST_F(AssemblerX86_64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - TEST_F(AssemblerX86_64Test, PushqRegs) { DriverStr(RepeatR(&x86_64::X86_64Assembler::pushq, "pushq %{reg}"), "pushq"); } @@ -978,10 +1100,6 @@ TEST_F(AssemblerX86_64Test, Movsxd) { DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd"); } -/////////////////// -// FP Operations // -/////////////////// - TEST_F(AssemblerX86_64Test, Movaps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps"); } @@ -1176,17 +1294,14 @@ TEST_F(AssemblerX86_64Test, Cvtsi2sd) { DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2sd, "cvtsi2sd %{reg2}, %{reg1}"), "cvtsi2sd"); } - TEST_F(AssemblerX86_64Test, Cvtss2si) { DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvtss2si, "cvtss2si %{reg2}, %{reg1}"), "cvtss2si"); } - TEST_F(AssemblerX86_64Test, Cvtss2sd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtss2sd, "cvtss2sd %{reg2}, %{reg1}"), "cvtss2sd"); } - TEST_F(AssemblerX86_64Test, Cvtsd2si) { DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvtsd2si, "cvtsd2si %{reg2}, %{reg1}"), "cvtsd2si"); } @@ -1586,8 +1701,6 @@ TEST_F(AssemblerX86_64Test, UcomisdAddress) { DriverStr(expected, "ucomisd_address"); } -// X87 - std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, x86_64::X86_64Assembler* assembler) { std::ostringstream str; @@ -1629,10 +1742,6 @@ TEST_F(AssemblerX86_64Test, FPUIntegerStore) { DriverStr(expected, "FPUIntegerStore"); } -//////////////// -// CALL / JMP // -//////////////// - TEST_F(AssemblerX86_64Test, Call) { DriverStr(RepeatR(&x86_64::X86_64Assembler::call, "call *%{reg}"), "call"); } @@ -1668,10 +1777,6 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) { DriverFn(&ret_and_leave_fn, "retleave"); } -////////// -// MISC // -////////// - TEST_F(AssemblerX86_64Test, Bswapl) { DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl"); } @@ -1824,11 +1929,6 @@ TEST_F(AssemblerX86_64Test, CmovqAddress) { DriverStr(expected, "cmovq_address"); } - -///////////////// -// Near labels // -///////////////// - TEST_F(AssemblerX86_64Test, Jrcxz) { x86_64::NearLabel target; GetAssembler()->jrcxz(&target); |