diff options
Diffstat (limited to 'compiler')
100 files changed, 6399 insertions, 2207 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index d060dd49de..c798d9782a 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -315,6 +315,7 @@ art_cc_library { srcs: ["common_compiler_test.cc"], shared_libs: [ "libartd-compiler", + "libartd-disassembler", "libart-runtime-gtest", "libbase", ], diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index 5347e7fef3..866a4d57a7 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -26,7 +26,7 @@ #include "debug/dwarf/dwarf_constants.h" #include "debug/dwarf/dwarf_test.h" #include "debug/dwarf/headers.h" -#include "disassembler/disassembler.h" +#include "disassembler.h" #include "gtest/gtest.h" #include "thread.h" diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index a9a718f43c..0d38620b1a 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -95,7 +95,7 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { const void* method_code = CompiledMethod::CodePointer(code_ptr, compiled_method->GetInstructionSet()); LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code; - class_linker_->SetEntryPointsToCompiledCode(method, method_code); + method->SetEntryPointFromQuickCompiledCode(method_code); } else { // No code? You must mean to go into the interpreter. // Or the generic JNI... diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 97127f58ed..c67c523eb3 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -126,6 +126,7 @@ class LinkerPatch { kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. kStringRelative, // NOTE: Actual patching is instruction_set-dependent. + kStringInternTable, // NOTE: Actual patching is instruction_set-dependent. kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; @@ -196,6 +197,16 @@ class LinkerPatch { return patch; } + static LinkerPatch StringInternTablePatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_string_idx) { + LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file); + patch.string_idx_ = target_string_idx; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch StringBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -234,6 +245,7 @@ class LinkerPatch { case Type::kTypeRelative: case Type::kTypeBssEntry: case Type::kStringRelative: + case Type::kStringInternTable: case Type::kStringBssEntry: case Type::kBakerReadBarrierBranch: return true; @@ -264,12 +276,14 @@ class LinkerPatch { const DexFile* TargetStringDexFile() const { DCHECK(patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return target_dex_file_; } dex::StringIndex TargetStringIndex() const { DCHECK(patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return dex::StringIndex(string_idx_); } @@ -280,6 +294,7 @@ class LinkerPatch { patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return pc_insn_offset_; } diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc index 1ef3ba7c00..e36d416e9f 100644 --- a/compiler/dex/dex_to_dex_decompiler_test.cc +++ b/compiler/dex/dex_to_dex_decompiler_test.cc @@ -17,12 +17,12 @@ #include "dex_to_dex_decompiler.h" #include "class_linker.h" -#include "compiler/common_compiler_test.h" -#include "compiler/compiled_method.h" -#include "compiler/driver/compiler_driver.h" -#include "compiler/driver/compiler_options.h" +#include "common_compiler_test.h" +#include "compiled_method.h" #include "compiler_callbacks.h" #include "dex_file.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" #include "runtime.h" diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc index 872f7ea15d..23511e55fc 100644 --- a/compiler/dex/quick_compiler_callbacks.cc +++ b/compiler/dex/quick_compiler_callbacks.cc @@ -16,6 +16,7 @@ #include "quick_compiler_callbacks.h" +#include "driver/compiler_driver.h" #include "verification_results.h" #include "verifier/method_verifier-inl.h" @@ -33,4 +34,21 @@ void QuickCompilerCallbacks::ClassRejected(ClassReference ref) { } } +ClassStatus QuickCompilerCallbacks::GetPreviousClassState(ClassReference ref) { + // If we don't have class unloading enabled in the compiler, we will never see class that were + // previously verified. Return false to avoid overhead from the lookup in the compiler driver. + if (!does_class_unloading_) { + return ClassStatus::kStatusNotReady; + } + DCHECK(compiler_driver_ != nullptr); + // In the case of the quicken filter: avoiding verification of quickened instructions, which the + // verifier doesn't currently support. + // In the case of the verify filter, avoiding verifiying twice. + ClassStatus status; + if (!compiler_driver_->GetCompiledClass(ref, &status)) { + return ClassStatus::kStatusNotReady; + } + return status; +} + } // namespace art diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h index a3a6c0972c..45456f2a1c 100644 --- a/compiler/dex/quick_compiler_callbacks.h +++ b/compiler/dex/quick_compiler_callbacks.h @@ -22,6 +22,7 @@ namespace art { +class CompilerDriver; class VerificationResults; class QuickCompilerCallbacks FINAL : public CompilerCallbacks { @@ -53,8 +54,19 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks { verification_results_ = verification_results; } + ClassStatus GetPreviousClassState(ClassReference ref) OVERRIDE; + + void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver) + OVERRIDE { + does_class_unloading_ = does_class_unloading; + compiler_driver_ = compiler_driver; + DCHECK(!does_class_unloading || compiler_driver_ != nullptr); + } + private: VerificationResults* verification_results_ = nullptr; + bool does_class_unloading_ = false; + CompilerDriver* compiler_driver_ = nullptr; std::unique_ptr<verifier::VerifierDeps> verifier_deps_; }; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index bd530ac6a6..18b54eefba 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2102,16 +2102,29 @@ class VerifyClassVisitor : public CompilationVisitor { ClassReference ref(manager_->GetDexFile(), class_def_index); manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); - // It is *very* problematic if there are verification errors in the boot classpath. For example, - // we rely on things working OK without verification when the decryption dialog is brought up. - // So abort in a debug build if we find this violated. + // It is *very* problematic if there are verification errors in the boot classpath. + // For example, we rely on things working OK without verification when the decryption dialog + // is brought up. So abort in a debug build if we find this violated. if (kIsDebugBuild) { - // TODO(narayan): Remove this special case for signature polymorphic - // invokes once verifier support is fully implemented. - if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage() && - !android::base::StartsWith(descriptor, "Ljava/lang/invoke/")) { - DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass() - << " failed to fully verify: state= " << klass->GetStatus(); + if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) { + if (!klass->IsVerified()) { + // Re-run verification to get all failure messages if it soft-failed. + if (!klass->IsErroneous()) { + gLogVerbosity.verifier = true; + // Note: We can't call ClassLinker::VerifyClass, as it will elide the second + // verification. + Runtime* runtime = Runtime::Current(); + std::string v_error; + verifier::MethodVerifier::VerifyClass(soa.Self(), + klass.Get(), + runtime->GetCompilerCallbacks(), + runtime->IsAotCompiler(), + verifier::HardFailLogMode::kLogInternalFatal, + &v_error); + } + LOG(FATAL) << "Boot classpath class " << klass->PrettyClass() + << " failed to fully verify: state= " << klass->GetStatus(); + } } if (klass->IsVerified()) { DCHECK_EQ(failure_kind, verifier::FailureKind::kNoFailure); @@ -2879,9 +2892,9 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const { DCHECK(status != nullptr); // The table doesn't know if something wasn't inserted. For this case it will return - // kStatusNotReady. To handle this, just assume anything not verified is not compiled. + // kStatusNotReady. To handle this, just assume anything we didn't try to verify is not compiled. if (!compiled_classes_.Get(DexFileReference(ref.first, ref.second), status) || - *status < mirror::Class::kStatusVerified) { + *status < mirror::Class::kStatusRetryVerificationAtRuntime) { return false; } return true; diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index d9886a2fba..d08d9d7940 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -22,6 +22,8 @@ #include <unordered_set> #include <vector> +#include "android-base/strings.h" + #include "arch/instruction_set.h" #include "base/array_ref.h" #include "base/bit_utils.h" @@ -377,6 +379,14 @@ class CompilerDriver { return profile_compilation_info_; } + // Is `boot_image_filename` the name of a core image (small boot + // image used for ART testing only)? + static bool IsCoreImageFilename(const std::string& boot_image_filename) { + // TODO: This is under-approximating... + return android::base::EndsWith(boot_image_filename, "core.art") + || android::base::EndsWith(boot_image_filename, "core-optimizing.art"); + } + private: void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index fee6afb91f..278358b250 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -23,6 +23,7 @@ #include "art_method-inl.h" #include "class_linker-inl.h" #include "common_compiler_test.h" +#include "compiler_callbacks.h" #include "dex_file.h" #include "dex_file_types.h" #include "gc/heap.h" @@ -366,6 +367,41 @@ TEST_F(CompilerDriverVerifyTest, VerifyCompilation) { CheckVerifiedClass(class_loader, "LSecond;"); } +// Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the +// driver. +TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { + Thread* const self = Thread::Current(); + jobject class_loader; + std::vector<const DexFile*> dex_files; + const DexFile* dex_file = nullptr; + { + ScopedObjectAccess soa(self); + class_loader = LoadDex("ProfileTestMultiDex"); + ASSERT_NE(class_loader, nullptr); + dex_files = GetDexFiles(class_loader); + ASSERT_GT(dex_files.size(), 0u); + dex_file = dex_files.front(); + } + compiler_driver_->SetDexFilesForOatFile(dex_files); + callbacks_->SetDoesClassUnloading(true, compiler_driver_.get()); + ClassReference ref(dex_file, 0u); + // Test that the status is read from the compiler driver as expected. + for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime; + i < mirror::Class::kStatusMax; + ++i) { + const mirror::Class::Status expected_status = static_cast<mirror::Class::Status>(i); + // Skip unsupported status that are not supposed to be ever recorded. + if (expected_status == mirror::Class::kStatusVerifyingAtRuntime || + expected_status == mirror::Class::kStatusInitializing) { + continue; + } + compiler_driver_->RecordClassStatus(ref, expected_status); + mirror::Class::Status status = {}; + ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status)); + EXPECT_EQ(status, expected_status); + } +} + // TODO: need check-cast test (when stub complete & we can throw/catch } // namespace art diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 3cacc2cad7..538845de19 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -18,6 +18,8 @@ #include <fstream> +#include "runtime.h" + namespace art { CompilerOptions::CompilerOptions() @@ -30,6 +32,7 @@ CompilerOptions::CompilerOptions() inline_max_code_units_(kUnsetInlineMaxCodeUnits), no_inline_from_(nullptr), boot_image_(false), + core_image_(false), app_image_(false), top_k_profile_threshold_(kDefaultTopKProfileThreshold), debuggable_(false), @@ -55,6 +58,19 @@ CompilerOptions::~CompilerOptions() { // because we don't want to include the PassManagerOptions definition from the header file. } +bool CompilerOptions::EmitRunTimeChecksInDebugMode() const { + // Run-time checks (e.g. Marking Register checks) are only emitted + // in debug mode, and + // - when running on device; or + // - when running on host, but only + // - when compiling the core image (which is used only for testing); or + // - when JIT compiling (only relevant for non-native methods). + // This is to prevent these checks from being emitted into pre-opted + // boot image or apps, as these are compiled with dex2oatd. + return kIsDebugBuild && + (kIsTargetBuild || IsCoreImage() || Runtime::Current()->UseJitCompilation()); +} + void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) { ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage); } diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index b99263db0e..a9372c4844 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -161,6 +161,9 @@ class CompilerOptions FINAL { return generate_mini_debug_info_; } + // Should run-time checks be emitted in debug mode? + bool EmitRunTimeChecksInDebugMode() const; + bool GetGenerateBuildId() const { return generate_build_id_; } @@ -177,14 +180,27 @@ class CompilerOptions FINAL { return implicit_suspend_checks_; } + // Are we compiling a boot image? bool IsBootImage() const { return boot_image_; } + // Are we compiling a core image (small boot image only used for ART testing)? + bool IsCoreImage() const { + // Ensure that `core_image_` => `boot_image_`. + DCHECK(!core_image_ || boot_image_); + return core_image_; + } + + // Are we compiling an app image? bool IsAppImage() const { return app_image_; } + void DisableAppImage() { + app_image_ = false; + } + // Should the code be compiled as position independent? bool GetCompilePic() const { return compile_pic_; @@ -266,6 +282,7 @@ class CompilerOptions FINAL { const std::vector<const DexFile*>* no_inline_from_; bool boot_image_; + bool core_image_; bool app_image_; // When using a profile file only the top K% of the profiled samples will be compiled. double top_k_profile_threshold_; diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 252fdd67e1..7b623dd979 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -46,7 +46,7 @@ TEST_F(ImageTest, TestImageLayout) { // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the // first two images. ASSERT_EQ(image_sizes.size(), image_sizes.size()); - // Sizes of the images should be the same. These sizes are for the whole image unrounded. + // Sizes of the object sections should be the same for all but the last image. for (size_t i = 0; i < image_sizes.size() - 1; ++i) { EXPECT_EQ(image_sizes[i], image_sizes_extra[i]); } diff --git a/compiler/image_test.h b/compiler/image_test.h index daa4b11967..f1adeddb69 100644 --- a/compiler/image_test.h +++ b/compiler/image_test.h @@ -133,7 +133,7 @@ inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { ImageHeader image_header; CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); CHECK(image_header.IsValid()); - ret.push_back(image_header.GetImageSize()); + ret.push_back(image_header.GetObjectsSection().Size()); } return ret; } @@ -398,7 +398,7 @@ inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ImageHeader image_header; ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); ASSERT_TRUE(image_header.IsValid()); - const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); + const auto& bitmap_section = image_header.GetImageBitmapSection(); ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); ASSERT_NE(0U, bitmap_section.Size()); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 9e4971ce75..fa9f64c9a6 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -298,8 +298,7 @@ bool ImageWriter::Write(int image_fd, // Write out the image bitmap at the page aligned start of the image end, also uncompressed for // convenience. - const ImageSection& bitmap_section = image_header->GetImageSection( - ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageBitmapSection(); // Align up since data size may be unaligned if the image is compressed. size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize); if (!is_compressed) { @@ -690,7 +689,7 @@ bool ImageWriter::AllocMemory() { for (ImageInfo& image_info : image_infos_) { ImageSection unused_sections[ImageHeader::kSectionCount]; const size_t length = RoundUp( - image_info.CreateImageSections(unused_sections), kPageSize); + image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize); std::string error_msg; image_info.image_.reset(MemMap::MapAnonymous("image writer image", @@ -1686,6 +1685,10 @@ void ImageWriter::CalculateNewObjectOffsets() { runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs); image_methods_[ImageHeader::kSaveEverythingMethod] = runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything); + image_methods_[ImageHeader::kSaveEverythingMethodForClinit] = + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForClinit); + image_methods_[ImageHeader::kSaveEverythingMethodForSuspendCheck] = + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForSuspendCheck); // Visit image methods first to have the main runtime methods in the first image. for (auto* m : image_methods_) { CHECK(m != nullptr); @@ -1831,7 +1834,8 @@ void ImageWriter::CalculateNewObjectOffsets() { image_info.image_begin_ = global_image_begin_ + image_offset; image_info.image_offset_ = image_offset; ImageSection unused_sections[ImageHeader::kSectionCount]; - image_info.image_size_ = RoundUp(image_info.CreateImageSections(unused_sections), kPageSize); + image_info.image_size_ = + RoundUp(image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize); // There should be no gaps until the next image. image_offset += image_info.image_size_; } @@ -1862,7 +1866,8 @@ void ImageWriter::CalculateNewObjectOffsets() { } } -size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const { +size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections, + bool app_image) const { DCHECK(out_sections != nullptr); // Do not round up any sections here that are represented by the bins since it will break @@ -1901,8 +1906,13 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays]; *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray], bin_slot_sizes_[kBinDexCacheArray]); - // Round up to the alignment the string table expects. See HashSet::WriteToMemory. - size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t)); + // For boot image, round up to the page boundary to separate the interned strings and + // class table from the modifiable data. We shall mprotect() these pages read-only when + // we load the boot image. This is more than sufficient for the string table alignment, + // namely sizeof(uint64_t). See HashSet::WriteToMemory. + static_assert(IsAligned<sizeof(uint64_t)>(kPageSize), "String table alignment check."); + size_t cur_pos = + RoundUp(dex_cache_arrays_section->End(), app_image ? sizeof(uint64_t) : kPageSize); // Calculate the size of the interned strings. ImageSection* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings]; *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); @@ -1925,7 +1935,7 @@ void ImageWriter::CreateHeader(size_t oat_index) { // Create the image sections. ImageSection sections[ImageHeader::kSectionCount]; - const size_t image_end = image_info.CreateImageSections(sections); + const size_t image_end = image_info.CreateImageSections(sections, compile_app_image_); // Finally bitmap section. const size_t bitmap_bytes = image_info.image_bitmap_->Size(); @@ -2110,8 +2120,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // Write the intern table into the image. if (image_info.intern_table_bytes_ > 0) { - const ImageSection& intern_table_section = image_header->GetImageSection( - ImageHeader::kSectionInternedStrings); + const ImageSection& intern_table_section = image_header->GetInternedStringsSection(); InternTable* const intern_table = image_info.intern_table_.get(); uint8_t* const intern_table_memory_ptr = image_info.image_->Begin() + intern_table_section.Offset(); @@ -2130,8 +2139,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple // class loaders. Writing multiple class tables into the image is currently unsupported. if (image_info.class_table_bytes_ > 0u) { - const ImageSection& class_table_section = image_header->GetImageSection( - ImageHeader::kSectionClassTable); + const ImageSection& class_table_section = image_header->GetClassTableSection(); uint8_t* const class_table_memory_ptr = image_info.image_->Begin() + class_table_section.Offset(); ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); @@ -2589,10 +2597,6 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, CopyReference(copy->GetDeclaringClassAddressWithoutBarrier(), orig->GetDeclaringClassUnchecked()); - mirror::MethodDexCacheType* orig_resolved_methods = - orig->GetDexCacheResolvedMethods(target_ptr_size_); - copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_); - // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to // oat_begin_ diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 866e2042f7..2fc394e862 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -258,7 +258,7 @@ class ImageWriter FINAL { // Create the image sections into the out sections variable, returns the size of the image // excluding the bitmap. - size_t CreateImageSections(ImageSection* out_sections) const; + size_t CreateImageSections(ImageSection* out_sections, bool app_image) const; std::unique_ptr<MemMap> image_; // Memory mapped for generating the image. diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index b65b93f05f..e7e4647866 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -219,7 +219,9 @@ static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm = GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features); - jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo()); + const CompilerOptions& compiler_options = driver->GetCompilerOptions(); + jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); + jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode()); // Offsets into data structures // TODO: if cross compiling these offsets are for the host not the target diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index 18ff1c9bb6..4ca5afe177 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -28,7 +28,7 @@ namespace linker { class ArmBaseRelativePatcher::ThunkData { public: ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset) - : code_(code), + : code_(std::move(code)), offsets_(), max_next_offset_(max_next_offset), pending_offset_(0u) { diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index db829f3233..0ebabc15aa 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -63,6 +63,7 @@ inline bool IsAdrpPatch(const LinkerPatch& patch) { case LinkerPatch::Type::kTypeRelative: case LinkerPatch::Type::kTypeBssEntry: case LinkerPatch::Type::kStringRelative: + case LinkerPatch::Type::kStringInternTable: case LinkerPatch::Type::kStringBssEntry: return patch.LiteralOffset() == patch.PcInsnOffset(); } @@ -266,6 +267,7 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry || patch.GetType() == LinkerPatch::Type::kTypeBssEntry || + patch.GetType() == LinkerPatch::Type::kStringInternTable || patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; } diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc index 3bec30f1e8..6c974c308f 100644 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -61,10 +61,6 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, // lui reg, offset_high DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); DCHECK_EQ((*code)[literal_offset + 3], 0x3C); - // addu reg, reg, reg2 - DCHECK_EQ((*code)[literal_offset + 4], 0x21); - DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00); - DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00); } } else { // instr reg(s), offset_low diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index d7e3a28777..a33081e033 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -62,6 +62,9 @@ namespace art { namespace { // anonymous namespace +// If we write dex layout info in the oat file. +static constexpr bool kWriteDexLayoutInfo = true; + typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader; const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) { @@ -288,10 +291,14 @@ class OatWriter::OatDexFile { uint32_t class_offsets_offset_; uint32_t lookup_table_offset_; uint32_t method_bss_mapping_offset_; + uint32_t dex_sections_layout_offset_; // Data to write to a separate section. dchecked_vector<uint32_t> class_offsets_; + // Dex section layout info to serialize. + DexLayoutSections dex_sections_layout_; + private: DISALLOW_COPY_AND_ASSIGN(OatDexFile); }; @@ -328,6 +335,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo bss_method_entries_(), bss_type_entries_(), bss_string_entries_(), + map_boot_image_tables_to_bss_(false), oat_data_offset_(0u), oat_header_(nullptr), size_vdex_header_(0), @@ -362,6 +370,9 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo size_oat_dex_file_offset_(0), size_oat_dex_file_class_offsets_offset_(0), size_oat_dex_file_lookup_table_offset_(0), + size_oat_dex_file_dex_layout_sections_offset_(0), + size_oat_dex_file_dex_layout_sections_(0), + size_oat_dex_file_dex_layout_sections_alignment_(0), size_oat_dex_file_method_bss_mapping_offset_(0), size_oat_lookup_table_alignment_(0), size_oat_lookup_table_(0), @@ -571,11 +582,16 @@ bool OatWriter::WriteAndOpenDexFiles( } } - // Write TypeLookupTables into OAT. + // Write type lookup tables into the oat file. if (!WriteTypeLookupTables(&checksum_updating_rodata, dex_files)) { return false; } + // Write dex layout sections into the oat file. + if (!WriteDexLayoutSections(&checksum_updating_rodata, dex_files)) { + return false; + } + *opened_dex_files_map = std::move(dex_files_map); *opened_dex_files = std::move(dex_files); write_state_ = WriteState::kPrepareLayout; @@ -756,6 +772,8 @@ class OatWriter::InitBssLayoutMethodVisitor : public DexMethodVisitor { } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u); + } else if (patch.GetType() == LinkerPatch::Type::kStringInternTable) { + writer_->map_boot_image_tables_to_bss_ = true; } } } else { @@ -1383,6 +1401,14 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { target_offset); break; } + case LinkerPatch::Type::kStringInternTable: { + uint32_t target_offset = GetInternTableEntryOffset(patch); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); + break; + } case LinkerPatch::Type::kStringBssEntry: { StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); uint32_t target_offset = @@ -1520,7 +1546,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(), patch.TargetStringIndex(), @@ -1588,6 +1613,28 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { data[2] = (address >> 16) & 0xffu; data[3] = (address >> 24) & 0xffu; } + + // Calculate the offset of the InternTable slot (GcRoot<String>) when mmapped to the .bss. + uint32_t GetInternTableEntryOffset(const LinkerPatch& patch) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!writer_->HasBootImage()); + const uint8_t* string_root = writer_->LookupBootImageInternTableSlot( + *patch.TargetStringDexFile(), patch.TargetStringIndex()); + DCHECK(string_root != nullptr); + uint32_t base_offset = writer_->bss_start_; + for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) { + const uint8_t* const_tables_begin = + space->Begin() + space->GetImageHeader().GetBootImageConstantTablesOffset(); + size_t offset = static_cast<size_t>(string_root - const_tables_begin); + if (offset < space->GetImageHeader().GetBootImageConstantTablesSize()) { + DCHECK_LE(base_offset + offset, writer_->bss_start_ + writer_->bss_methods_offset_); + return base_offset + offset; + } + base_offset += space->GetImageHeader().GetBootImageConstantTablesSize(); + } + LOG(FATAL) << "Didn't find boot image string in boot image intern tables!"; + UNREACHABLE(); + } }; class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor { @@ -1927,19 +1974,22 @@ void OatWriter::InitBssLayout(InstructionSet instruction_set) { DCHECK_EQ(bss_size_, 0u); if (HasBootImage()) { + DCHECK(!map_boot_image_tables_to_bss_); DCHECK(bss_string_entries_.empty()); - if (bss_method_entries_.empty() && bss_type_entries_.empty()) { - // Nothing to put to the .bss section. - return; - } + } + if (!map_boot_image_tables_to_bss_ && + bss_method_entries_.empty() && + bss_type_entries_.empty() && + bss_string_entries_.empty()) { + // Nothing to put to the .bss section. + return; } - // Allocate space for app dex cache arrays in the .bss section. + // Allocate space for boot image tables in the .bss section. PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); - if (!HasBootImage()) { - for (const DexFile* dex_file : *dex_files_) { - DexCacheArraysLayout layout(pointer_size, dex_file); - bss_size_ += layout.Size(); + if (map_boot_image_tables_to_bss_) { + for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) { + bss_size_ += space->GetImageHeader().GetBootImageConstantTablesSize(); } } @@ -2320,6 +2370,9 @@ bool OatWriter::WriteCode(OutputStream* out) { DO_STAT(size_oat_dex_file_offset_); DO_STAT(size_oat_dex_file_class_offsets_offset_); DO_STAT(size_oat_dex_file_lookup_table_offset_); + DO_STAT(size_oat_dex_file_dex_layout_sections_offset_); + DO_STAT(size_oat_dex_file_dex_layout_sections_); + DO_STAT(size_oat_dex_file_dex_layout_sections_alignment_); DO_STAT(size_oat_dex_file_method_bss_mapping_offset_); DO_STAT(size_oat_lookup_table_alignment_); DO_STAT(size_oat_lookup_table_); @@ -2775,7 +2828,12 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil &error_msg); } else if (oat_dex_file->source_.IsRawFile()) { File* raw_file = oat_dex_file->source_.GetRawFile(); - dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg); + int dup_fd = dup(raw_file->Fd()); + if (dup_fd < 0) { + PLOG(ERROR) << "Failed to dup dex file descriptor (" << raw_file->Fd() << ") at " << location; + return false; + } + dex_file = DexFile::OpenDex(dup_fd, location, /* verify_checksum */ true, &error_msg); } else { // The source data is a vdex file. CHECK(oat_dex_file->source_.IsRawData()) @@ -2808,6 +2866,7 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) { return false; } + oat_dex_file->dex_sections_layout_ = dex_layout.GetSections(); // Set the checksum of the new oat dex file to be the original file's checksum. oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum(); return true; @@ -3153,6 +3212,70 @@ bool OatWriter::WriteTypeLookupTables( return true; } +bool OatWriter::WriteDexLayoutSections( + OutputStream* oat_rodata, + const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) { + TimingLogger::ScopedTiming split(__FUNCTION__, timings_); + + if (!kWriteDexLayoutInfo) { + return true;; + } + + uint32_t expected_offset = oat_data_offset_ + oat_size_; + off_t actual_offset = oat_rodata->Seek(expected_offset, kSeekSet); + if (static_cast<uint32_t>(actual_offset) != expected_offset) { + PLOG(ERROR) << "Failed to seek to dex layout section offset section. Actual: " << actual_offset + << " Expected: " << expected_offset << " File: " << oat_rodata->GetLocation(); + return false; + } + + DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size()); + size_t rodata_offset = oat_size_; + for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) { + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + DCHECK_EQ(oat_dex_file->dex_sections_layout_offset_, 0u); + + // Write dex layout section alignment bytes. + const size_t padding_size = + RoundUp(rodata_offset, alignof(DexLayoutSections)) - rodata_offset; + if (padding_size != 0u) { + std::vector<uint8_t> buffer(padding_size, 0u); + if (!oat_rodata->WriteFully(buffer.data(), padding_size)) { + PLOG(ERROR) << "Failed to write lookup table alignment padding." + << " File: " << oat_dex_file->GetLocation() + << " Output: " << oat_rodata->GetLocation(); + return false; + } + size_oat_dex_file_dex_layout_sections_alignment_ += padding_size; + rodata_offset += padding_size; + } + + DCHECK_ALIGNED(rodata_offset, alignof(DexLayoutSections)); + DCHECK_EQ(oat_data_offset_ + rodata_offset, + static_cast<size_t>(oat_rodata->Seek(0u, kSeekCurrent))); + DCHECK(oat_dex_file != nullptr); + if (!oat_rodata->WriteFully(&oat_dex_file->dex_sections_layout_, + sizeof(oat_dex_file->dex_sections_layout_))) { + PLOG(ERROR) << "Failed to write dex layout sections." + << " File: " << oat_dex_file->GetLocation() + << " Output: " << oat_rodata->GetLocation(); + return false; + } + oat_dex_file->dex_sections_layout_offset_ = rodata_offset; + size_oat_dex_file_dex_layout_sections_ += sizeof(oat_dex_file->dex_sections_layout_); + rodata_offset += sizeof(oat_dex_file->dex_sections_layout_); + } + oat_size_ = rodata_offset; + + if (!oat_rodata->Flush()) { + PLOG(ERROR) << "Failed to flush stream after writing type dex layout sections." + << " File: " << oat_rodata->GetLocation(); + return false; + } + + return true; +} + bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) { if (!kIsVdexEnabled) { return true; @@ -3252,6 +3375,7 @@ OatWriter::OatDexFile::OatDexFile(const char* dex_file_location, class_offsets_offset_(0u), lookup_table_offset_(0u), method_bss_mapping_offset_(0u), + dex_sections_layout_offset_(0u), class_offsets_() { } @@ -3262,7 +3386,8 @@ size_t OatWriter::OatDexFile::SizeOf() const { + sizeof(dex_file_offset_) + sizeof(class_offsets_offset_) + sizeof(lookup_table_offset_) - + sizeof(method_bss_mapping_offset_); + + sizeof(method_bss_mapping_offset_) + + sizeof(dex_sections_layout_offset_); } bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const { @@ -3305,6 +3430,12 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) cons } oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_); + if (!out->WriteFully(&dex_sections_layout_offset_, sizeof(dex_sections_layout_offset_))) { + PLOG(ERROR) << "Failed to write dex section layout info to " << out->GetLocation(); + return false; + } + oat_writer->size_oat_dex_file_dex_layout_sections_offset_ += sizeof(dex_sections_layout_offset_); + if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) { PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation(); return false; @@ -3409,4 +3540,25 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer, OutputStream* out) const return true; } +const uint8_t* OatWriter::LookupBootImageInternTableSlot(const DexFile& dex_file, + dex::StringIndex string_idx) + NO_THREAD_SAFETY_ANALYSIS { + // Single-threaded OatWriter can avoid locking. + uint32_t utf16_length; + const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length); + DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data)); + InternTable::Utf8String string(utf16_length, + utf8_data, + ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length)); + const InternTable* intern_table = Runtime::Current()->GetClassLinker()->intern_table_; + for (const InternTable::Table::UnorderedSet& table : intern_table->strong_interns_.tables_) { + auto it = table.Find(string); + if (it != table.end()) { + return reinterpret_cast<const uint8_t*>(std::addressof(*it)); + } + } + LOG(FATAL) << "Did not find boot image string " << utf8_data; + UNREACHABLE(); +} + } // namespace art diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 470d69edb3..780dee0bac 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -324,6 +324,8 @@ class OatWriter { bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location); bool WriteTypeLookupTables(OutputStream* oat_rodata, const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files); + bool WriteDexLayoutSections(OutputStream* oat_rodata, + const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files); bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat); void SetMultiOatRelativePatcherAdjustment(); @@ -331,6 +333,10 @@ class OatWriter { bool MayHaveCompiledMethods() const; + // Find the address of the GcRoot<String> in the InternTable for a boot image string. + const uint8_t* LookupBootImageInternTableSlot(const DexFile& dex_file, + dex::StringIndex string_idx); + enum class WriteState { kAddingDexFileSources, kPrepareLayout, @@ -405,6 +411,10 @@ class OatWriter { // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`. SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_; + // Whether boot image tables should be mapped to the .bss. This is needed for compiled + // code that reads from these tables with PC-relative instructions. + bool map_boot_image_tables_to_bss_; + // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; @@ -455,6 +465,9 @@ class OatWriter { uint32_t size_oat_dex_file_offset_; uint32_t size_oat_dex_file_class_offsets_offset_; uint32_t size_oat_dex_file_lookup_table_offset_; + uint32_t size_oat_dex_file_dex_layout_sections_offset_; + uint32_t size_oat_dex_file_dex_layout_sections_; + uint32_t size_oat_dex_file_dex_layout_sections_alignment_; uint32_t size_oat_dex_file_method_bss_mapping_offset_; uint32_t size_oat_lookup_table_alignment_; uint32_t size_oat_lookup_table_; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index c166deb406..2f96cfa382 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1121,6 +1121,66 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitRem(HRem* instruction) OVERRIDE { + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + + // Handle 'i % CONST' format expression in array index, e.g: + // array[i % 20]; + if (right->IsIntConstant()) { + int32_t right_const = std::abs(right->AsIntConstant()->GetValue()); + if (right_const == 0) { + return; + } + // The sign of divisor CONST doesn't affect the sign final value range. + // For example: + // if (i > 0) { + // array[i % 10]; // index value range [0, 9] + // array[i % -10]; // index value range [0, 9] + // } + ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound(nullptr, 1 - right_const), + ValueBound(nullptr, right_const - 1)); + + ValueRange* left_range = LookupValueRange(left, left->GetBlock()); + if (left_range != nullptr) { + right_range = left_range->Narrow(right_range); + } + AssignRange(instruction->GetBlock(), instruction, right_range); + return; + } + + // Handle following pattern: + // i0 NullCheck + // i1 ArrayLength[i0] + // i2 DivByZeroCheck [i1] <-- right + // i3 Rem [i5, i2] <-- we are here. + // i4 BoundsCheck [i3,i1] + if (right->IsDivZeroCheck()) { + // if array_length can pass div-by-zero check, + // array_length must be > 0. + right = right->AsDivZeroCheck()->InputAt(0); + } + + // Handle 'i % array.length' format expression in array index, e.g: + // array[(i+7) % array.length]; + if (right->IsArrayLength()) { + ValueBound lower = ValueBound::Min(); // ideally, lower should be '1-array_length'. + ValueBound upper = ValueBound(right, -1); // array_length - 1 + ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + lower, + upper); + ValueRange* left_range = LookupValueRange(left, left->GetBlock()); + if (left_range != nullptr) { + right_range = left_range->Narrow(right_range); + } + AssignRange(instruction->GetBlock(), instruction, right_range); + return; + } + } + void VisitNewArray(HNewArray* new_array) OVERRIDE { HInstruction* len = new_array->GetLength(); if (!len->IsIntConstant()) { diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 575e2fc24a..2aaf05833c 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -951,4 +951,152 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ASSERT_TRUE(IsRemoved(bounds_check6)); } +// int[] array = new int[10]; +// for (int i=0; i<200; i++) { +// array[i%10] = 10; // Can eliminate +// array[i%1] = 10; // Can eliminate +// array[i%200] = 10; // Cannot eliminate +// array[i%-10] = 10; // Can eliminate +// array[i%array.length] = 10; // Can eliminate +// array[param_i%10] = 10; // Can't eliminate, when param_i < 0 +// } +TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + HInstruction* param_i = new (&allocator_) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + entry->AddInstruction(param_i); + + HInstruction* constant_0 = graph_->GetIntConstant(0); + HInstruction* constant_1 = graph_->GetIntConstant(1); + HInstruction* constant_10 = graph_->GetIntConstant(10); + HInstruction* constant_200 = graph_->GetIntConstant(200); + HInstruction* constant_minus_10 = graph_->GetIntConstant(-10); + + HBasicBlock* block = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(block); + entry->AddSuccessor(block); + // We pass a bogus constant for the class to avoid mocking one. + HInstruction* new_array = new (&allocator_) HNewArray(constant_10, constant_10, 0); + block->AddInstruction(new_array); + block->AddInstruction(new (&allocator_) HGoto()); + + HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + + graph_->AddBlock(loop_header); + graph_->AddBlock(loop_body); + graph_->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); + HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi, constant_200); + HInstruction* if_inst = new (&allocator_) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + phi->AddInput(constant_0); + + ////////////////////////////////////////////////////////////////////////////////// + // LOOP BODY: + // array[i % 10] = 10; + HRem* i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_10, 0); + HBoundsCheck* bounds_check_i_mod_10 = new (&allocator_) HBoundsCheck(i_mod_10, constant_10, 0); + HInstruction* array_set = new (&allocator_) HArraySet( + new_array, bounds_check_i_mod_10, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(i_mod_10); + loop_body->AddInstruction(bounds_check_i_mod_10); + loop_body->AddInstruction(array_set); + + // array[i % 1] = 10; + HRem* i_mod_1 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0); + HBoundsCheck* bounds_check_i_mod_1 = new (&allocator_) HBoundsCheck(i_mod_1, constant_10, 0); + array_set = new (&allocator_) HArraySet( + new_array, bounds_check_i_mod_1, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(i_mod_1); + loop_body->AddInstruction(bounds_check_i_mod_1); + loop_body->AddInstruction(array_set); + + // array[i % 200] = 10; + HRem* i_mod_200 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0); + HBoundsCheck* bounds_check_i_mod_200 = new (&allocator_) HBoundsCheck(i_mod_200, constant_10, 0); + array_set = new (&allocator_) HArraySet( + new_array, bounds_check_i_mod_200, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(i_mod_200); + loop_body->AddInstruction(bounds_check_i_mod_200); + loop_body->AddInstruction(array_set); + + // array[i % -10] = 10; + HRem* i_mod_minus_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_minus_10, 0); + HBoundsCheck* bounds_check_i_mod_minus_10 = new (&allocator_) HBoundsCheck( + i_mod_minus_10, constant_10, 0); + array_set = new (&allocator_) HArraySet( + new_array, bounds_check_i_mod_minus_10, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(i_mod_minus_10); + loop_body->AddInstruction(bounds_check_i_mod_minus_10); + loop_body->AddInstruction(array_set); + + // array[i%array.length] = 10; + HNullCheck* null_check = new (&allocator_) HNullCheck(new_array, 0); + HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); + HRem* i_mod_array_length = new (&allocator_) HRem(Primitive::kPrimInt, phi, array_length, 0); + HBoundsCheck* bounds_check_i_mod_array_len = new (&allocator_) HBoundsCheck( + i_mod_array_length, array_length, 0); + array_set = new (&allocator_) HArraySet( + null_check, bounds_check_i_mod_array_len, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(i_mod_array_length); + loop_body->AddInstruction(bounds_check_i_mod_array_len); + loop_body->AddInstruction(array_set); + + // array[param_i % 10] = 10; + HRem* param_i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, param_i, constant_10, 0); + HBoundsCheck* bounds_check_param_i_mod_10 = new (&allocator_) HBoundsCheck( + param_i_mod_10, constant_10, 0); + array_set = new (&allocator_) HArraySet( + new_array, bounds_check_param_i_mod_10, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(param_i_mod_10); + loop_body->AddInstruction(bounds_check_param_i_mod_10); + loop_body->AddInstruction(array_set); + + // array[param_i%array.length] = 10; + null_check = new (&allocator_) HNullCheck(new_array, 0); + array_length = new (&allocator_) HArrayLength(null_check, 0); + HRem* param_i_mod_array_length = new (&allocator_) HRem( + Primitive::kPrimInt, param_i, array_length, 0); + HBoundsCheck* bounds_check_param_i_mod_array_len = new (&allocator_) HBoundsCheck( + param_i_mod_array_length, array_length, 0); + array_set = new (&allocator_) HArraySet( + null_check, bounds_check_param_i_mod_array_len, constant_10, Primitive::kPrimInt, 0); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(param_i_mod_array_length); + loop_body->AddInstruction(bounds_check_param_i_mod_array_len); + loop_body->AddInstruction(array_set); + + // i++; + HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, constant_1); + loop_body->AddInstruction(add); + loop_body->AddInstruction(new (&allocator_) HGoto()); + phi->AddInput(add); + ////////////////////////////////////////////////////////////////////////////////// + + exit->AddInstruction(new (&allocator_) HExit()); + + RunBCE(); + + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_10)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_1)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_200)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_minus_10)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_array_len)); + ASSERT_FALSE(IsRemoved(bounds_check_param_i_mod_10)); +} + } // namespace art diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4999950600..1b628688ec 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -435,11 +435,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { // The string entry page address was preserved in temp_ thanks to kSaveEverything. } else { // For non-Baker read barrier, we need to re-calculate the address of the string entry page. - adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + adrp_label_ = arm64_codegen->NewStringBssEntryPatch(dex_file, string_index); arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); } vixl::aarch64::Label* strp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); + arm64_codegen->NewStringBssEntryPatch(dex_file, string_index, adrp_label_); { SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); __ Bind(strp_label); @@ -1463,6 +1463,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1595,6 +1596,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() { __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } + + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { @@ -3587,6 +3590,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -4391,6 +4395,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { @@ -4459,6 +4464,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4626,6 +4633,7 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( @@ -4668,6 +4676,13 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { baker_read_barrier_patches_.emplace_back(custom_data); return &baker_read_barrier_patches_.back().label; @@ -4757,6 +4772,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4769,13 +4785,15 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc } else { DCHECK(pc_relative_method_patches_.empty()); DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), info.custom_data)); @@ -4801,27 +4819,37 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } - // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - LocationSummary* locations = invoke->GetLocations(); - codegen_->GenerateStaticOrDirectCall( - invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + { + // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there + // are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } - // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); + { + // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there + // are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); + } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4895,6 +4923,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -4995,6 +5024,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA } else { __ Bind(slow_path->GetExitLabel()); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } } @@ -5024,6 +5054,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5071,24 +5102,37 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative String patch. + const DexFile& dex_file = load->GetDexFile(); + const dex::StringIndex string_index = load->GetStringIndex(); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative String patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); + return; } case HLoadString::LoadKind::kBssEntry: { // Add ADRP with its PC-relative String .bss entry patch. @@ -5096,11 +5140,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD const dex::StringIndex string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0)); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative String patch. + // Add LDR with its .bss entry String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, @@ -5113,6 +5157,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -5137,6 +5182,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -5164,6 +5210,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -5260,6 +5307,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { @@ -5296,6 +5344,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { @@ -5644,6 +5693,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction return; } GenerateSuspendCheck(instruction, nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { @@ -6021,6 +6071,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6074,22 +6125,25 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins obj.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - __ ldr(ref_reg, MemOperand(base.X(), offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); + { + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + __ ldr(ref_reg, MemOperand(base.X(), offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); } - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; } @@ -6158,19 +6212,22 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); __ Add(temp.X(), obj.X(), Operand(data_offset)); - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + { + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + } + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; } @@ -6247,6 +6304,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, @@ -6303,6 +6361,7 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, @@ -6381,6 +6440,19 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); } +void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { + // The following condition is a compile-time one, so it does not have a run-time cost. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + // The following condition is a run-time one; it is executed after the + // previous compile-time test, to avoid penalizing non-debug builds. + if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW(); + GetAssembler()->GenerateMarkingRegisterCheck(temp, code); + } + } +} + void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, Location out, Location ref, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 584eead81b..69c511907e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -599,6 +599,14 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new .bss entry string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new baker read barrier patch and return the label to be bound // before the CBNZ instruction. vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); @@ -687,6 +695,22 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check, bool use_load_acquire); + // Emit code checking the status of the Marking Register, and + // aborting the program if MR does not match the value stored in the + // art::Thread object. Code is only emitted in debug mode and if + // CompilerOptions::EmitRunTimeChecksInDebugMode returns true. + // + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck in the code generator, and is + // passed to the BRK instruction. + // + // If `temp_loc` is a valid location, it is expected to be a + // register and will be used as a temporary to generate code; + // otherwise, a temporary will be fetched from the core register + // scratch pool. + virtual void MaybeGenerateMarkingRegisterCheck(int code, + Location temp_loc = Location::NoLocation()); + // Generate a read barrier for a heap reference within `instruction` // using a slow path. // @@ -809,8 +833,10 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 430cdde1f7..8288141954 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -94,6 +94,9 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; // The reserved entrypoint register for link-time generated thunks. const vixl32::Register kBakerCcEntrypointRegister = r4; +// Using a base helps identify when we hit Marking Register check breakpoints. +constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -595,7 +598,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + arm_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); arm_codegen->EmitMovwMovtPlaceholder(labels, temp); __ Str(r0, MemOperand(temp)); } @@ -1872,15 +1875,26 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( case kCondBE: case kCondA: case kCondAE: { + const uint32_t value_low = Low32Bits(value); + Operand operand_low(value_low); + __ Cmp(left_high, High32Bits(value)); + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we must ensure that the operands corresponding to the least significant + // halves of the inputs fit into a 16-bit CMP encoding. + if (!left_low.IsLow() || !IsUint<8>(value_low)) { + operand_low = Operand(temps.Acquire()); + __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low); + } + // We use the scope because of the IT block that follows. ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); __ it(eq); - __ cmp(eq, left_low, Low32Bits(value)); + __ cmp(eq, left_low, operand_low); ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; } @@ -2022,46 +2036,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* return ret; } -static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { - if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { - const LocationSummary* const locations = condition->GetLocations(); - - if (locations->InAt(1).IsConstant()) { - IfCondition c = condition->GetCondition(); - IfCondition opposite = condition->GetOppositeCondition(); - const int64_t value = - AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); - - if (c < kCondLT || c > kCondGE) { - // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, - // we check that the least significant half of the first input to be compared - // is in a low register (the other half is read outside an IT block), and - // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used; 0 is always handled, no matter what registers are - // used by the first input. - if (value != 0 && - (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { - return false; - } - // TODO(VIXL): The rest of the checks are there to keep the backend in sync with - // the previous one, but are not strictly necessary. - } else if (c == kCondLE || c == kCondGT) { - if (value < std::numeric_limits<int64_t>::max() && - !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { - return false; - } - } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { - return false; - } - } - } - - return true; -} - static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); - const vixl32::Register out = OutputRegister(cond); const auto condition = GenerateTest(cond, false, codegen); @@ -2144,91 +2119,6 @@ static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { } } -static void GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label, - CodeGeneratorARMVIXL* codegen, - bool is_far_target = true) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } else { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } else { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label, is_far_target); -} - static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); @@ -2283,38 +2173,14 @@ static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codege } } - if ((condition == kCondEQ || condition == kCondNE) && - // If `out` is a low register, then the GenerateConditionGeneric() - // function generates a shorter code sequence that is still branchless. - (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) { GenerateEqualLong(cond, codegen); return; } - if (CanGenerateTest(cond, codegen->GetAssembler())) { - GenerateConditionGeneric(cond, codegen); - return; - } - - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - vixl32::Label true_label, false_label; - - GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false); - - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); - - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - - if (done_label.IsReferenced()) { - __ Bind(&done_label); - } + GenerateConditionGeneric(cond, codegen); } static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, @@ -2514,6 +2380,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2690,6 +2557,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { __ Mov(temp, 0); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } + + MaybeGenerateMarkingRegisterCheck(/* code */ 1); } void CodeGeneratorARMVIXL::GenerateFrameExit() { @@ -2938,6 +2807,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -2971,56 +2841,41 @@ void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, - vixl32::Label* true_target_in, - vixl32::Label* false_target_in, + vixl32::Label* true_target, + vixl32::Label* false_target, bool is_far_target) { - if (CanGenerateTest(condition, codegen_->GetAssembler())) { - vixl32::Label* non_fallthrough_target; - bool invert; - bool emit_both_branches; - - if (true_target_in == nullptr) { - // The true target is fallthrough. - DCHECK(false_target_in != nullptr); - non_fallthrough_target = false_target_in; - invert = true; - emit_both_branches = false; - } else { - non_fallthrough_target = true_target_in; - invert = false; - // Either the false target is fallthrough, or there is no fallthrough - // and both branches must be emitted. - emit_both_branches = (false_target_in != nullptr); - } - - const auto cond = GenerateTest(condition, invert, codegen_); - - __ B(cond.first, non_fallthrough_target, is_far_target); + if (true_target == false_target) { + DCHECK(true_target != nullptr); + __ B(true_target); + return; + } - if (emit_both_branches) { - // No target falls through, we need to branch. - __ B(false_target_in); - } + vixl32::Label* non_fallthrough_target; + bool invert; + bool emit_both_branches; - return; + if (true_target == nullptr) { + // The true target is fallthrough. + DCHECK(false_target != nullptr); + non_fallthrough_target = false_target; + invert = true; + emit_both_branches = false; + } else { + non_fallthrough_target = true_target; + invert = false; + // Either the false target is fallthrough, or there is no fallthrough + // and both branches must be emitted. + emit_both_branches = (false_target != nullptr); } - // Generated branching requires both targets to be explicit. If either of the - // targets is nullptr (fallthrough) use and bind `fallthrough` instead. - vixl32::Label fallthrough; - vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in; - vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; + const auto cond = GenerateTest(condition, invert, codegen_); - DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target); + __ B(cond.first, non_fallthrough_target, is_far_target); - if (false_target != &fallthrough) { + if (emit_both_branches) { + // No target falls through, we need to branch. __ B(false_target); } - - if (fallthrough.IsReferenced()) { - __ Bind(&fallthrough); - } } void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction, @@ -3215,9 +3070,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { return; } - if (!Primitive::IsFloatingPointType(type) && - (IsBooleanValueOrMaterializedCondition(condition) || - CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) { + if (!Primitive::IsFloatingPointType(type)) { bool invert = false; if (out.Equals(second)) { @@ -3655,6 +3508,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3); } void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -3685,12 +3539,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4); return; } LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3709,11 +3566,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6); return; } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7); } void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -3790,6 +3650,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!codegen_->IsLeafMethod()); } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8); } void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { @@ -3798,6 +3660,7 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); } void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { @@ -5329,6 +5192,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); } void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { @@ -5348,6 +5212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); } void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { @@ -6965,6 +6830,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi return; } GenerateSuspendCheck(instruction, nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); } void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, @@ -7326,6 +7192,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -7405,6 +7272,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ } else { __ Bind(slow_path->GetExitLabel()); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); } } @@ -7448,6 +7316,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7505,14 +7374,22 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + __ Ldr(out, MemOperand(out, /* offset */ 0)); + return; } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); @@ -7520,7 +7397,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE ? RegisterFrom(locations->GetTemp(0)) : out; CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, temp); GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = @@ -7528,6 +7405,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -7548,6 +7426,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); } static int32_t GetExceptionTlsOffset() { @@ -8146,6 +8025,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); } void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { @@ -8647,6 +8527,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); } void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { @@ -8711,31 +8592,34 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Note: We need a specific width for the unpoisoning NEG. - if (kPoisonHeapReferences) { - if (narrow) { - // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). - __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); - } else { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); return; } @@ -8796,23 +8680,26 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); __ Add(data_reg, obj, Operand(data_offset)); - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Note: We need a Wide NEG for the unpoisoning. - if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); return; } @@ -8866,6 +8753,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ 21); } void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, @@ -8920,6 +8808,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ 22); } void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, @@ -8981,6 +8870,20 @@ void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); } +void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { + // The following condition is a compile-time one, so it does not have a run-time cost. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + // The following condition is a run-time one; it is executed after the + // previous compile-time test, to avoid penalizing non-debug builds. + if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire(); + GetAssembler()->GenerateMarkingRegisterCheck(temp, + kMarkingRegisterCheckBreakCodeBaseCode + code); + } + } +} + void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, Location out, Location ref, @@ -9226,6 +9129,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeSt return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &string_bss_entry_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9294,6 +9202,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -9306,13 +9215,15 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa } else { DCHECK(pc_relative_method_patches_.empty()); DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), info.custom_data)); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 7ab2993161..e78bc15614 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -579,6 +579,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -661,6 +663,28 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ScaleFactor scale_factor, bool needs_null_check); + // Emit code checking the status of the Marking Register, and + // aborting the program if MR does not match the value stored in the + // art::Thread object. Code is only emitted in debug mode and if + // CompilerOptions::EmitRunTimeChecksInDebugMode returns true. + // + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck in the code generator, and is + // used together with kMarkingRegisterCheckBreakCodeBaseCode to + // create the value passed to the BKPT instruction. Note that unlike + // in the ARM64 code generator, where `__LINE__` is passed as `code` + // argument to + // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot + // realistically do that here, as Encoding T1 for the BKPT + // instruction only accepts 8-bit immediate values. + // + // If `temp_loc` is a valid location, it is expected to be a + // register and will be used as a temporary to generate code; + // otherwise, a temporary will be fetched from the core register + // scratch pool. + virtual void MaybeGenerateMarkingRegisterCheck(int code, + Location temp_loc = Location::NoLocation()); + // Generate a read barrier for a heap reference within `instruction` // using a slow path. // @@ -781,8 +805,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index b6eb5c1d1d..ac8f675e2d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -267,13 +267,10 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(bss_info_high_); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_); - bool reordering = __ SetReorder(false); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - __ SetReorder(reordering); + __ Sw(calling_convention.GetRegisterAt(0), + entry_address, + /* placeholder */ 0x5678, + &info_low->label); } // Move the class to the desired location. @@ -296,10 +293,8 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low); - __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base); + __ Sw(out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678, &info_low->label); } __ B(GetExitLabel()); } @@ -365,14 +360,11 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { // The string entry address was preserved in `entry_address` thanks to kSaveEverything. DCHECK(bss_info_high_); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_); - bool reordering = __ SetReorder(false); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - __ SetReorder(reordering); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, bss_info_high_); + __ Sw(calling_convention.GetRegisterAt(0), + entry_address, + /* placeholder */ 0x5678, + &info_low->label); } Primitive::Type type = instruction_->GetType(); @@ -388,13 +380,11 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base); + __ Sw(out, TMP, /* placeholder */ 0x5678, &info_low->label); } __ B(GetExitLabel()); } @@ -1111,6 +1101,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1661,7 +1652,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, @@ -1673,13 +1665,15 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch } else { DCHECK(pc_relative_method_patches_.empty()); DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1722,6 +1716,13 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPa return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, @@ -1743,16 +1744,17 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, - Register base, - PcRelativePatchInfo* info_low) { + Register base) { DCHECK(!info_high->patch_info_high); DCHECK_NE(out, base); + bool reordering = __ SetReorder(false); if (GetInstructionSetFeatures().IsR6()) { DCHECK_EQ(base, ZERO); __ Bind(&info_high->label); __ Bind(&info_high->pc_rel_label); // Add the high half of a 32-bit offset to PC. __ Auipc(out, /* placeholder */ 0x1234); + __ SetReorder(reordering); } else { // If base is ZERO, emit NAL to obtain the actual base. if (base == ZERO) { @@ -1766,15 +1768,12 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo if (base == ZERO) { __ Bind(&info_high->pc_rel_label); } + __ SetReorder(reordering); // Add the high half of a 32-bit offset to PC. __ Addu(out, out, (base == ZERO) ? RA : base); } // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. lw, jialc, addiu). - if (info_low != nullptr) { - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); - } } CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( @@ -6573,7 +6572,8 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc DCHECK(!label_low); __ AddUpper(base, obj, offset_high); } - __ Beqz(T9, (isR6 ? 2 : 4)); // Skip jialc / addiu+jalr+nop. + MipsLabel skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -6588,6 +6588,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc __ Jalr(T9); __ Nop(); } + __ Bind(&skip_call); __ SetReorder(reordering); } else { // Note that we do not actually check the value of `GetIsGcMarking()` @@ -6724,27 +6725,31 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); Register ref_reg = ref.AsRegister<Register>(); Register base = short_offset ? obj : TMP; + MipsLabel skip_call; if (short_offset) { if (isR6) { - __ Beqzc(T9, 2); // Skip jialc. + __ Beqzc(T9, &skip_call, /* is_bare */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); } else { - __ Beqz(T9, 3); // Skip jalr+nop. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Nop(); // In delay slot. } + __ Bind(&skip_call); } else { if (isR6) { - __ Beqz(T9, 2); // Skip jialc. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Aui(base, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); } else { __ Lui(base, offset_high); - __ Beqz(T9, 2); // Skip jalr. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); + __ Bind(&skip_call); __ Addu(base, base, obj); // In delay slot. } } @@ -6826,15 +6831,18 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst Register index_reg = index.IsRegisterPair() ? index.AsRegisterPairLow<Register>() : index.AsRegister<Register>(); + MipsLabel skip_call; if (GetInstructionSetFeatures().IsR6()) { - __ Beqz(T9, 2); // Skip jialc. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); } else { __ Sll(TMP, index_reg, scale_factor); - __ Beqz(T9, 2); // Skip jalr. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); + __ Bind(&skip_call); __ Addu(TMP, TMP, obj); // In delay slot. } // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) @@ -7368,6 +7376,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7506,11 +7515,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); PcRelativePatchInfo* info_low = NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); - bool reordering = __ SetReorder(false); Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low); - __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: @@ -7522,10 +7529,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); Register temp_reg = temp.AsRegister<Register>(); - bool reordering = __ SetReorder(false); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low); - __ Lw(temp_reg, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -7720,13 +7725,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); - bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, - base_or_current_method_reg, - info_low); - __ Addiu(out, out, /* placeholder */ 0x5678); - __ SetReorder(reordering); + base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -7745,11 +7747,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high); constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>(); - bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, base_or_current_method_reg); - __ SetReorder(reordering); GenerateGcRootFieldLoad(cls, out_loc, temp, @@ -7829,6 +7829,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: if (isR6) { break; @@ -7875,6 +7876,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -7890,14 +7892,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, - base_or_current_method_reg, - info_low); - __ Addiu(out, out, /* placeholder */ 0x5678); - __ SetReorder(reordering); - return; // No dex cache slow path. + base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( @@ -7906,21 +7905,31 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; } - case HLoadString::LoadKind::kBssEntry: { + case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, + out, + base_or_current_method_reg); + __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + return; + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_high = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_low = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>(); - bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, base_or_current_method_reg); - __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, temp, diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 7195b9d89d..f15f8c672a 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -633,12 +633,14 @@ class CodeGeneratorMIPS : public CodeGenerator { PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, - Register base, - PcRelativePatchInfo* info_low = nullptr); + Register base); // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { @@ -700,8 +702,10 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3e79f474b6..71c2bfff19 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -318,9 +318,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { // The string entry address was preserved in `entry_address` thanks to kSaveEverything. DCHECK(bss_info_high_); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), - string_index, - bss_info_high_); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), + string_index, + bss_info_high_); __ Bind(&info_low->label); __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), @@ -339,9 +339,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { // For non-Baker read barriers we need to re-calculate the address of // the string entry. CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); + mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, info_low); __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); } @@ -1049,6 +1049,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1560,7 +1561,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + pc_relative_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, @@ -1572,13 +1574,15 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat } else { DCHECK(pc_relative_method_patches_.empty()); DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1621,6 +1625,13 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStri return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, @@ -3652,6 +3663,114 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, } } +bool InstructionCodeGeneratorMIPS64::MaterializeIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* input_locations, + GpuRegister dst) { + GpuRegister lhs = input_locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = input_locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsInt<16>(-rhs_imm)) { + if (is64bit) { + __ Daddiu(dst, lhs, -rhs_imm); + } else { + __ Addiu(dst, lhs, -rhs_imm); + } + } else if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + return (cond == kCondEQ); + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + return (cond == kCondGE); + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + return (cond == kCondGT); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + return (cond == kCondLE); + } + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + return (cond == kCondAE); + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + return (cond == kCondA); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + return (cond == kCondBE); + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, bool is64bit, LocationSummary* locations, @@ -3854,6 +3973,97 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, } } +bool InstructionCodeGeneratorMIPS64::MaterializeFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + FpuRegister dst) { + FpuRegister lhs = input_locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = input_locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqS(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtS(dst, lhs, rhs); + } else { + __ CmpUltS(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeS(dst, lhs, rhs); + } else { + __ CmpUleS(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltS(dst, rhs, lhs); + } else { + __ CmpLtS(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleS(dst, rhs, lhs); + } else { + __ CmpLeS(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqD(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtD(dst, lhs, rhs); + } else { + __ CmpUltD(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeD(dst, lhs, rhs); + } else { + __ CmpUleD(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltD(dst, rhs, lhs); + } else { + __ CmpLtD(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleD(dst, rhs, lhs); + } else { + __ CmpLeD(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, @@ -3905,6 +4115,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } else { DCHECK_EQ(type, Primitive::kPrimDouble); @@ -3951,6 +4162,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } } @@ -4069,6 +4281,306 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +// This function returns true if a conditional move can be generated for HSelect. +// Otherwise it returns false and HSelect must be implemented in terms of conditonal +// branches and regular moves. +// +// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect. +// +// While determining feasibility of a conditional move and setting inputs/outputs +// are two distinct tasks, this function does both because they share quite a bit +// of common logic. +static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) { + bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HCondition* condition = cond->AsCondition(); + + Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType(); + Primitive::Type dst_type = select->GetType(); + + HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); + HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); + bool is_true_value_zero_constant = + (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern()); + bool is_false_value_zero_constant = + (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern()); + + bool can_move_conditionally = false; + bool use_const_for_false_in = false; + bool use_const_for_true_in = false; + + if (!cond->IsConstant()) { + if (!Primitive::IsFloatingPointType(cond_type)) { + if (!Primitive::IsFloatingPointType(dst_type)) { + // Moving int/long on int/long condition. + if (is_true_value_zero_constant) { + // seleqz out_reg, false_reg, cond_reg + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez out_reg, true_reg, cond_reg + can_move_conditionally = true; + use_const_for_false_in = true; + } else if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + // selnez AT, true_reg, cond_reg + // seleqz TMP, false_reg, cond_reg + // or out_reg, AT, TMP + can_move_conditionally = true; + } + } else { + // Moving float/double on int/long condition. + if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } + } + } else { + if (!Primitive::IsFloatingPointType(dst_type)) { + // Moving int/long on float/double condition. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // seleqz out_reg, false_reg, TMP + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // selnez out_reg, true_reg, TMP + use_const_for_false_in = true; + } else { + // mfc1 TMP, temp_cond_reg + // selnez AT, true_reg, TMP + // seleqz TMP, false_reg, TMP + // or out_reg, AT, TMP + } + } else { + // Moving float/double on float/double condition. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } + } + } + + if (can_move_conditionally) { + DCHECK(!use_const_for_false_in || !use_const_for_true_in); + } else { + DCHECK(!use_const_for_false_in); + DCHECK(!use_const_for_true_in); + } + + if (locations_to_set != nullptr) { + if (use_const_for_false_in) { + locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); + } else { + locations_to_set->SetInAt(0, + Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (use_const_for_true_in) { + locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); + } else { + locations_to_set->SetInAt(1, + Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (materialized) { + locations_to_set->SetInAt(2, Location::RequiresRegister()); + } + + if (can_move_conditionally) { + locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } else { + locations_to_set->SetOut(Location::SameAsFirstInput()); + } + } + + return can_move_conditionally; +} + + +void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + Location dst = locations->Out(); + Location false_src = locations->InAt(0); + Location true_src = locations->InAt(1); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + GpuRegister cond_reg = TMP; + FpuRegister fcond_reg = FTMP; + Primitive::Type cond_type = Primitive::kPrimInt; + bool cond_inverted = false; + Primitive::Type dst_type = select->GetType(); + + if (IsBooleanValueOrMaterializedCondition(cond)) { + cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>(); + } else { + HCondition* condition = cond->AsCondition(); + LocationSummary* cond_locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + cond_type = condition->InputAt(0)->GetType(); + switch (cond_type) { + default: + cond_inverted = MaterializeIntLongCompare(if_cond, + /* is64bit */ false, + cond_locations, + cond_reg); + break; + case Primitive::kPrimLong: + cond_inverted = MaterializeIntLongCompare(if_cond, + /* is64bit */ true, + cond_locations, + cond_reg); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + cond_inverted = MaterializeFpCompare(if_cond, + condition->IsGtBias(), + cond_type, + cond_locations, + fcond_reg); + break; + } + } + + if (true_src.IsConstant()) { + DCHECK(true_src.GetConstant()->IsZeroBitPattern()); + } + if (false_src.IsConstant()) { + DCHECK(false_src.GetConstant()->IsZeroBitPattern()); + } + + switch (dst_type) { + default: + if (Primitive::IsFloatingPointType(cond_type)) { + __ Mfc1(cond_reg, fcond_reg); + } + if (true_src.IsConstant()) { + if (cond_inverted) { + __ Selnez(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Seleqz(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); + } + } else if (false_src.IsConstant()) { + if (cond_inverted) { + __ Seleqz(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Selnez(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); + } + } else { + DCHECK_NE(cond_reg, AT); + if (cond_inverted) { + __ Seleqz(AT, true_src.AsRegister<GpuRegister>(), cond_reg); + __ Selnez(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Selnez(AT, true_src.AsRegister<GpuRegister>(), cond_reg); + __ Seleqz(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); + } + __ Or(dst.AsRegister<GpuRegister>(), AT, TMP); + } + break; + case Primitive::kPrimFloat: { + if (!Primitive::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); + if (true_src.IsConstant()) { + FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelS(fcond_reg, + true_src.AsFpuRegister<FpuRegister>(), + false_src.AsFpuRegister<FpuRegister>()); + } else { + __ SelS(fcond_reg, + false_src.AsFpuRegister<FpuRegister>(), + true_src.AsFpuRegister<FpuRegister>()); + } + __ MovS(dst_reg, fcond_reg); + } + break; + } + case Primitive::kPrimDouble: { + if (!Primitive::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); + if (true_src.IsConstant()) { + FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelD(fcond_reg, + true_src.AsFpuRegister<FpuRegister>(), + false_src.AsFpuRegister<FpuRegister>()); + } else { + __ SelD(fcond_reg, + false_src.AsFpuRegister<FpuRegister>(), + true_src.AsFpuRegister<FpuRegister>()); + } + __ MovD(dst_reg, fcond_reg); + } + break; + } + } +} + void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(flag, LocationSummary::kNoCall); @@ -4084,28 +4596,22 @@ void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimize void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - } - if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { - locations->SetInAt(2, Location::RequiresRegister()); - } - locations->SetOut(Location::SameAsFirstInput()); + CanMoveConditionally(select, locations); } void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { - LocationSummary* locations = select->GetLocations(); - Mips64Label false_target; - GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); + if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) { + GenConditionalMove(select); + } else { + LocationSummary* locations = select->GetLocations(); + Mips64Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); + } } void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -4490,7 +4996,8 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr DCHECK(!label_low); __ Daui(base, obj, offset_high); } - __ Beqz(T9, 2); // Skip jialc. + Mips64Label skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -4499,6 +5006,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction // in delay slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); } else { // Note that we do not actually check the value of `GetIsGcMarking()` // to decide whether to mark the loaded GC root or not. Instead, we @@ -4617,18 +5125,21 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + Mips64Label skip_call; if (short_offset) { - __ Beqzc(T9, 2); // Skip jialc. + __ Beqzc(T9, &skip_call, /* is_bare */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); // /* HeapReference<Object> */ ref = *(obj + offset) __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction. } else { int16_t offset_low = Low16Bits(offset); int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. - __ Beqz(T9, 2); // Skip jialc. + __ Beqz(T9, &skip_call, /* is_bare */ true); __ Daui(TMP, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); // /* HeapReference<Object> */ ref = *(obj + offset) __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction. } @@ -4702,11 +5213,13 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); - __ Beqz(T9, 2); // Skip jialc. + Mips64Label skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); GpuRegister index_reg = index.AsRegister<GpuRegister>(); __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction. @@ -5227,6 +5740,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5615,7 +6129,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( @@ -5624,14 +6138,24 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA __ LoadLiteral(out, kLoadUnsignedWord, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; } - case HLoadString::LoadKind::kBssEntry: { + case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Lwu(out, AT, /* placeholder */ 0x5678); + return; + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; GpuRegister temp = non_baker_read_barrier ? out diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d03a9eabd4..3035621972 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -293,6 +293,13 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* input_locations, + GpuRegister dst); void GenerateIntLongCompareAndBranch(IfCondition cond, bool is64bit, LocationSummary* locations, @@ -301,6 +308,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool gt_bias, Primitive::Type type, LocationSummary* locations); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + FpuRegister dst); void GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, @@ -320,6 +335,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { int32_t VecAddress(LocationSummary* locations, size_t size, /* out */ GpuRegister* adjusted_base); + void GenConditionalMove(HSelect* select); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -589,6 +605,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, @@ -650,8 +669,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative type patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index f422b9fc8b..18a55c8b09 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -15,7 +15,9 @@ */ #include "code_generator_arm64.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -25,12 +27,13 @@ namespace arm64 { using helpers::ARM64EncodableConstantOrRegister; using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; -using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64ConstantFrom; -using helpers::XRegisterFrom; +using helpers::OutputRegister; +using helpers::VRegisterFrom; using helpers::WRegisterFrom; +using helpers::XRegisterFrom; #define __ GetVIXLAssembler()-> @@ -125,20 +128,51 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } } -void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V4S(), 0); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V2D(), 0); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. @@ -167,6 +201,46 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addv(dst.S(), src.V4S()); + break; + case HVecReduce::kMin: + __ Sminv(dst.S(), src.V4S()); + break; + case HVecReduce::kMax: + __ Smaxv(dst.S(), src.V4S()); + break; + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addp(dst.D(), src.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD min/max"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -261,6 +335,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { break; default: LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); } } @@ -803,6 +878,77 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister dst = VRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ Movi(dst.V16B(), 0); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); switch (instr->GetPackedType()) { diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 527691d9d9..7a11dff41e 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -73,19 +73,11 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala } } -void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { +void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { +void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } @@ -112,6 +104,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -621,6 +621,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LOG(FATAL) << "No SIMD for " << instr->GetId(); } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ea36e90112..c2fbf7f04b 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -88,19 +88,11 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* } } -void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { +void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { +void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } @@ -133,6 +125,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -818,12 +818,83 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 0395db1df9..9d3a777c13 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -91,19 +91,11 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar } } -void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { +void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { +void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } @@ -136,6 +128,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -822,12 +822,83 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 14782d70a1..37190f8363 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -15,7 +15,9 @@ */ #include "code_generator_x86.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace x86 { @@ -25,23 +27,31 @@ namespace x86 { void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); switch (instruction->GetPackedType()) { case Primitive::kPrimLong: - // Long needs extra temporary to load the register pair. - locations->AddTemp(Location::RequiresFpuRegister()); + // Long needs extra temporary to load from the register pair. + if (!is_zero) { + locations->AddTemp(Location::RequiresFpuRegister()); + } FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -51,46 +61,53 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); - XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + __ xorps(dst, dst); + return; + } + switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ punpcklbw(reg, reg); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ punpcklbw(dst, dst); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimLong: { XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>()); + __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); - __ punpckldq(reg, tmp); - __ punpcklqdq(reg, reg); + __ punpckldq(dst, tmp); + __ punpcklqdq(dst, dst); break; } case Primitive::kPrimFloat: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); - __ shufps(reg, reg, Immediate(0)); + __ shufps(dst, dst, Immediate(0)); break; case Primitive::kPrimDouble: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); - __ shufpd(reg, reg, Immediate(0)); + __ shufpd(dst, dst, Immediate(0)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -98,20 +115,65 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i } } -void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: + // Long needs extra temporary to store into the register pair. + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case Primitive::kPrimInt: + DCHECK_LE(4u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ movd(locations->Out().AsRegister<Register>(), src); + break; + case Primitive::kPrimLong: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(locations->Out().AsRegisterPairLow<Register>(), src); + __ pshufd(tmp, src, Immediate(1)); + __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. @@ -135,6 +197,73 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Long reduction or min/max require a temporary. + if (instruction->GetPackedType() == Primitive::kPrimLong || + instruction->GetKind() == HVecReduce::kMin || + instruction->GetKind() == HVecReduce::kMax) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(dst, src); + __ phaddd(dst, dst); + __ phaddd(dst, dst); + break; + case HVecReduce::kMin: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pminsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pminsd(dst, tmp); + break; + } + case HVecReduce::kMax: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pmaxsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pmaxsd(dst, tmp); + break; + } + } + break; + case Primitive::kPrimLong: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(tmp, src); + __ movaps(dst, src); + __ punpckhqdq(tmp, tmp); + __ paddq(dst, tmp); + break; + case HVecReduce::kMin: + case HVecReduce::kMax: + LOG(FATAL) << "Unsupported SIMD type"; + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -819,6 +948,91 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: + // Long needs extra temporary to load from register pairs. + if (!is_zero) { + locations->AddTemp(Location::RequiresFpuRegister()); + } + FALLTHROUGH_INTENDED; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ xorps(dst, dst); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + break; + case Primitive::kPrimLong: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorps(tmp, tmp); + __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); + __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); + __ punpckldq(dst, tmp); + break; + } + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LOG(FATAL) << "No SIMD for " << instr->GetId(); } @@ -866,6 +1080,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin case 8: scale = TIMES_8; break; default: break; } + // Incorporate the string or array offset in the address computation. uint32_t offset = is_string_char_at ? mirror::String::ValueOffset().Uint32Value() : mirror::Array::DataOffset(size).Uint32Value(); @@ -900,7 +1115,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); __ j(kNotZero, ¬_compressed); // Zero extend 8 compressed bytes into 8 chars. - __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); __ pxor(tmp, tmp); __ punpcklbw(reg, tmp); __ jmp(&done); diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 246044ebb8..7051ba041f 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -15,7 +15,9 @@ */ #include "code_generator_x86_64.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace x86_64 { @@ -25,6 +27,8 @@ namespace x86_64 { void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -32,13 +36,16 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -48,42 +55,49 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); - XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + __ xorps(dst, dst); + return; + } + switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ punpcklbw(reg, reg); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklbw(dst, dst); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + __ pshufd(dst, dst, Immediate(0)); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit - __ punpcklqdq(reg, reg); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + __ punpcklqdq(dst, dst); break; case Primitive::kPrimFloat: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); - __ shufps(reg, reg, Immediate(0)); + __ shufps(dst, dst, Immediate(0)); break; case Primitive::kPrimDouble: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); - __ shufpd(reg, reg, Immediate(0)); + __ shufpd(dst, dst, Immediate(0)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -91,20 +105,57 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar } } -void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(locations->Out().AsRegister<CpuRegister>(), src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. @@ -128,6 +179,73 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Long reduction or min/max require a temporary. + if (instruction->GetPackedType() == Primitive::kPrimLong || + instruction->GetKind() == HVecReduce::kMin || + instruction->GetKind() == HVecReduce::kMax) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(dst, src); + __ phaddd(dst, dst); + __ phaddd(dst, dst); + break; + case HVecReduce::kMin: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pminsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pminsd(dst, tmp); + break; + } + case HVecReduce::kMax: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pmaxsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pmaxsd(dst, tmp); + break; + } + } + break; + case Primitive::kPrimLong: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(tmp, src); + __ movaps(dst, src); + __ punpckhqdq(tmp, tmp); + __ paddq(dst, tmp); + break; + case HVecReduce::kMin: + case HVecReduce::kMax: + LOG(FATAL) << "Unsupported SIMD type"; + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -812,6 +930,81 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ xorps(dst, dst); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LOG(FATAL) << "No SIMD for " << instr->GetId(); } @@ -859,6 +1052,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin case 8: scale = TIMES_8; break; default: break; } + // Incorporate the string or array offset in the address computation. uint32_t offset = is_string_char_at ? mirror::String::ValueOffset().Uint32Value() : mirror::Array::DataOffset(size).Uint32Value(); @@ -893,7 +1087,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); __ j(kNotZero, ¬_compressed); // Zero extend 8 compressed bytes into 8 chars. - __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); __ pxor(tmp, tmp); __ punpcklbw(reg, tmp); __ jmp(&done); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 99b7793c81..512968f01d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1035,6 +1035,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -4652,7 +4653,6 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { } void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_patches_.emplace_back(address, load_string->GetDexFile(), @@ -4664,9 +4664,9 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back( + string_bss_entry_patches_.emplace_back( address, load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset @@ -4691,7 +4691,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, @@ -4702,12 +4703,15 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche } else { DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -6219,6 +6223,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6237,6 +6242,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6282,14 +6288,21 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Register method_address = locations->InAt(0).AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f48753b614..b32d57a774 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -640,8 +640,10 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). + // String patch locations; type depends on configuration (intern table or boot image PIC). ArenaDeque<X86PcRelativePatchInfo> string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 8283887a96..0c3b2ad742 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1089,15 +1089,15 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { } void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); __ Bind(&string_patches_.back().label); } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + string_bss_entry_patches_.emplace_back( + load_string->GetDexFile(), load_string->GetStringIndex().index_); + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset @@ -1122,7 +1122,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, @@ -1133,12 +1134,15 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat } else { DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_, + linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1230,6 +1234,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { @@ -5621,6 +5626,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5678,14 +5684,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA DCHECK(codegen_->GetCompilerOptions().IsBootImage()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordBootStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 33c64290d4..f5fa86bf23 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -611,8 +611,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). + // String patch locations; type depends on configuration (intern table or boot image PIC). ArenaDeque<PatchInfo<Label>> string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index cada2e679b..aa4f5da3f0 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -79,6 +79,21 @@ class CodegenTargetConfig { }; #ifdef ART_ENABLE_CODEGEN_arm +// Special ARM code generator for codegen testing in a limited code +// generation environment (i.e. with no runtime support). +// +// Note: If we want to exercise certains HIR constructions +// (e.g. reference field load in Baker read barrier configuration) in +// codegen tests in the future, we should also: +// - save the Thread Register (R9) and possibly the Marking Register +// (R8) before entering the generated function (both registers are +// callee-save in AAPCS); +// - set these registers to meaningful values before or upon entering +// the generated function (so that generated code using them is +// correct); +// - restore their original values before leaving the generated +// function. + // Provide our own codegen, that ensures the C calling conventions // are preserved. Currently, ART and C do not match as R4 is caller-save // in ART, and callee-save in C. Alternatively, we could use or write @@ -100,6 +115,50 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { blocked_core_registers_[arm::R6] = false; blocked_core_registers_[arm::R7] = false; } + + void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, + Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + // When turned on, the marking register checks in + // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the + // Thread Register and the Marking Register to be set to + // meaningful values. This is not the case in codegen testing, so + // just disable them entirely here (by doing nothing in this + // method). + } +}; +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +// Special ARM64 code generator for codegen testing in a limited code +// generation environment (i.e. with no runtime support). +// +// Note: If we want to exercise certains HIR constructions +// (e.g. reference field load in Baker read barrier configuration) in +// codegen tests in the future, we should also: +// - save the Thread Register (X19) and possibly the Marking Register +// (X20) before entering the generated function (both registers are +// callee-save in AAPCS64); +// - set these registers to meaningful values before or upon entering +// the generated function (so that generated code using them is +// correct); +// - restore their original values before leaving the generated +// function. +class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { + public: + TestCodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {} + + void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, + Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + // When turned on, the marking register checks in + // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the + // Thread Register and the Marking Register to be set to + // meaningful values. This is not the case in codegen testing, so + // just disable them entirely here (by doing nothing in this + // method). + } }; #endif @@ -263,7 +322,8 @@ static void RunCode(CodegenTargetConfig target_config, bool has_result, Expected expected) { CompilerOptions compiler_options; - std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); + std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, + compiler_options)); RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); } @@ -280,9 +340,8 @@ CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& c CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph, - *features_arm64.get(), - compiler_options); + return new (graph->GetArena()) + TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options); } #endif diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index 0d4e1c5c97..fa3c4dfba8 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -91,7 +91,9 @@ class EmitSwapMipsTest : public ::testing::Test { return nullptr; } - void DriverWrapper(HParallelMove* move, std::string assembly_text, std::string test_name) { + void DriverWrapper(HParallelMove* move, + const std::string& assembly_text, + const std::string& test_name) { codegen_->GetMoveResolver()->EmitNativeCode(move); assembler_ = codegen_->GetAssembler(); assembler_->FinalizeCode(); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index a20ec3c0db..3035e4657d 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -501,6 +501,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("field_type") << iset->GetFieldType(); } + void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE { + StartAttributeStream("field_name") << + sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(), + /* with type */ false); + StartAttributeStream("field_type") << sget->GetFieldType(); + } + + void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE { + StartAttributeStream("field_name") << + sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), + /* with type */ false); + StartAttributeStream("field_type") << sset->GetFieldType(); + } + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { StartAttributeStream("field_type") << field_access->GetFieldType(); } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index f35aace3a9..089340e715 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -87,8 +87,10 @@ static bool IsGEZero(HInstruction* instruction) { IsGEZero(instruction->InputAt(1)); case Intrinsics::kMathAbsInt: case Intrinsics::kMathAbsLong: - // Instruction ABS(x) is >= 0. - return true; + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); default: break; } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index ab1772bf15..0b980f596a 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -151,6 +151,16 @@ class InductionVarRange { } /** + * Checks if the given phi instruction has been classified as anything by + * induction variable analysis. Returns false for anything that cannot be + * classified statically, such as reductions or other complex cycles. + */ + bool IsClassified(HPhi* phi) const { + HLoopInformation* lp = phi->GetBlock()->GetLoopInformation(); // closest enveloping loop + return (lp != nullptr) && (induction_analysis_->LookupInfo(lp, phi) != nullptr); + } + + /** * Checks if header logic of a loop terminates. Sets trip-count tc if known. */ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 5c79511bab..f2a829fa56 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -59,6 +59,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryDeMorganNegationFactoring(HBinaryOperation* op); bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); @@ -98,6 +99,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitVecMul(HVecMul* instruction) OVERRIDE; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -243,6 +245,84 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation return false; } +bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) { + Primitive::Type type = mul->GetPackedType(); + InstructionSet isa = codegen_->GetInstructionSet(); + switch (isa) { + case kArm64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt)) { + return false; + } + break; + case kMips: + case kMips64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong)) { + return false; + } + break; + default: + return false; + } + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD/SUB dst, acc, tmp + // with + // VECMULACC dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // This is always true since the `HVecMul` has only one use (which is checked above). + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsVecAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (arena) HVecMultiplyAccumulate(arena, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + + return false; +} + void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HInstruction* shift_amount = instruction->GetRight(); @@ -2301,4 +2381,10 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( return true; } +void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) { + if (TryCombineVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 311be1fb49..7c9bfb11b2 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -210,12 +210,6 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { } } -void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { - if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) { - RecordSimplification(); - } -} - void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { if (!instruction->IsStringCharAt() && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8596f6ad40..4f16fc383d 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -74,7 +74,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; void VisitVecLoad(HVecLoad* instruction) OVERRIDE; void VisitVecStore(HVecStore* instruction) OVERRIDE; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index d1bc4dadeb..7a759b9118 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -281,73 +281,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return true; } -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetPackedType(); - switch (isa) { - case kArm64: - if (!(type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort || - type == Primitive::kPrimInt)) { - return false; - } - break; - default: - return false; - } - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD/SUB dst, acc, tmp - // with - // VECMULACC dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // This is always true since the `HVecMul` has only one use (which is checked above). - DCHECK_NE(binop_left, binop_right); - if (binop_right == mul) { - accumulator = binop_left; - } else if (use->IsVecAdd()) { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (arena) HVecMultiplyAccumulate(arena, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - - return false; -} - bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { if (index->IsConstant()) { // If index is constant the whole address calculation often can be done by LDR/STR themselves. diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 371619fa2e..31e23833b1 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -58,7 +58,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* index, size_t data_offset); -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 5691dd0d4a..1ed1b7537e 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -470,6 +470,68 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) { GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler()); } +static void GenHighestOneBit(HInvoke* invoke, Primitive::Type type, MacroAssembler* masm) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + UseScratchRegisterScope temps(masm); + + Register src = InputRegisterAt(invoke, 0); + Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); + Register temp = (type == Primitive::kPrimLong) ? temps.AcquireX() : temps.AcquireW(); + size_t high_bit = (type == Primitive::kPrimLong) ? 63u : 31u; + size_t clz_high_bit = (type == Primitive::kPrimLong) ? 6u : 5u; + + __ Clz(temp, src); + __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate) + __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0. + __ Lsr(dst, dst, temp); +} + +void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimInt, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimLong, GetVIXLAssembler()); +} + +static void GenLowestOneBit(HInvoke* invoke, Primitive::Type type, MacroAssembler* masm) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + UseScratchRegisterScope temps(masm); + + Register src = InputRegisterAt(invoke, 0); + Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); + Register temp = (type == Primitive::kPrimLong) ? temps.AcquireX() : temps.AcquireW(); + + __ Neg(temp, src); + __ And(dst, temp, src); +} + +void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimInt, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimLong, GetVIXLAssembler()); +} + static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, @@ -504,14 +566,6 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } -static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenAbsInteger(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { @@ -526,7 +580,7 @@ static void GenAbsInteger(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { @@ -534,7 +588,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { @@ -1579,12 +1633,13 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { } // Assertions that must hold in order to compare strings 8 bytes at a time. + // Ok to do this because strings are zero-padded to kObjectAlignment. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); if (const_string != nullptr && - const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes - : kShortConstStringEqualsCutoffInBytes / 2u)) { + const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes + : kShortConstStringEqualsCutoffInBytes / 2u)) { // Load and compare the contents. Though we know the contents of the short const string // at compile time, materializing constants may be more code than loading from memory. int32_t offset = value_offset; @@ -1592,7 +1647,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u); temp = temp.X(); temp1 = temp1.X(); - while (remaining_bytes > 8u) { + while (remaining_bytes > sizeof(uint64_t)) { Register temp2 = XRegisterFrom(locations->GetTemp(0)); __ Ldp(temp, temp1, MemOperand(str.X(), offset)); __ Ldp(temp2, out, MemOperand(arg.X(), offset)); @@ -1628,7 +1683,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { temp1 = temp1.X(); Register temp2 = XRegisterFrom(locations->GetTemp(0)); // Loop to compare strings 8 bytes at a time starting at the front of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); @@ -2993,10 +3047,6 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { } UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 8b4044d69b..d2dc88a73b 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -331,6 +331,14 @@ static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } +static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, @@ -375,11 +383,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* in } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(arena_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -417,11 +421,7 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + CreateIntToIntLocations(arena_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { @@ -429,11 +429,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* i } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(arena_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -1713,6 +1709,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { } } +// The cut off for unrolling the loop in String.equals() intrinsic for const strings. +// The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression +// and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4 +// instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE). +// Allow up to 12 instructions (32 bytes) for the unrolled loop. +constexpr size_t kShortConstStringEqualsCutoffInBytes = 16; + +static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { + if (candidate->IsLoadString()) { + HLoadString* load_string = candidate->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); + } + return nullptr; +} + void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, @@ -1720,12 +1732,29 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. // Using instruction cbz requires a low register, so explicitly set a temp to be R0. locations->AddTemp(LocationFrom(r0)); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + // For the generic implementation and for long const strings we need an extra temporary. + // We do not need it for short const strings, up to 4 bytes, see code generation below. + uint32_t const_string_length = 0u; + const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); + if (const_string == nullptr) { + const_string = GetConstString(invoke->InputAt(1), &const_string_length); + } + bool is_compressed = + mirror::kUseStringCompression && + const_string != nullptr && + mirror::String::DexFileStringAllASCII(const_string, const_string_length); + if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) { + locations->AddTemp(Location::RequiresRegister()); + } + + // TODO: If the String.equals() is used only for an immediately following HIf, we can + // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. + // Then we shall need an extra temporary register instead of the output register. locations->SetOut(Location::RequiresRegister()); } @@ -1738,8 +1767,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Register out = OutputRegister(invoke); vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); - vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); - vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); vixl32::Label loop; vixl32::Label end; @@ -1771,52 +1798,109 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. __ Ldr(temp, MemOperand(str, class_offset)); - __ Ldr(temp1, MemOperand(arg, class_offset)); - __ Cmp(temp, temp1); + __ Ldr(out, MemOperand(arg, class_offset)); + __ Cmp(temp, out); __ B(ne, &return_false, /* far_target */ false); } - // Load `count` fields of this and argument strings. - __ Ldr(temp, MemOperand(str, count_offset)); - __ Ldr(temp1, MemOperand(arg, count_offset)); - // Check if `count` fields are equal, return false if they're not. - // Also compares the compression style, if differs return false. - __ Cmp(temp, temp1); - __ B(ne, &return_false, /* far_target */ false); - // Return true if both strings are empty. Even with string compression `count == 0` means empty. - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); + // Check if one of the inputs is a const string. Do not special-case both strings + // being const, such cases should be handled by constant folding if needed. + uint32_t const_string_length = 0u; + const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); + if (const_string == nullptr) { + const_string = GetConstString(invoke->InputAt(1), &const_string_length); + if (const_string != nullptr) { + std::swap(str, arg); // Make sure the const string is in `str`. + } + } + bool is_compressed = + mirror::kUseStringCompression && + const_string != nullptr && + mirror::String::DexFileStringAllASCII(const_string, const_string_length); + + if (const_string != nullptr) { + // Load `count` field of the argument string and check if it matches the const string. + // Also compares the compression style, if differs return false. + __ Ldr(temp, MemOperand(arg, count_offset)); + __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); + __ B(ne, &return_false, /* far_target */ false); + } else { + // Load `count` fields of this and argument strings. + __ Ldr(temp, MemOperand(str, count_offset)); + __ Ldr(out, MemOperand(arg, count_offset)); + // Check if `count` fields are equal, return false if they're not. + // Also compares the compression style, if differs return false. + __ Cmp(temp, out); + __ B(ne, &return_false, /* far_target */ false); + } // Assertions that must hold in order to compare strings 4 bytes at a time. + // Ok to do this because strings are zero-padded to kObjectAlignment. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); - if (mirror::kUseStringCompression) { - // For string compression, calculate the number of bytes to compare (not chars). - // This could in theory exceed INT32_MAX, so treat temp as unsigned. - __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cs); // If uncompressed, - __ add(cs, temp, temp, temp); // double the byte count. - } + if (const_string != nullptr && + const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes + : kShortConstStringEqualsCutoffInBytes / 2u)) { + // Load and compare the contents. Though we know the contents of the short const string + // at compile time, materializing constants may be more code than loading from memory. + int32_t offset = value_offset; + size_t remaining_bytes = + RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u); + while (remaining_bytes > sizeof(uint32_t)) { + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); + vixl32::Register temp2 = scratch_scope.Acquire(); + __ Ldrd(temp, temp1, MemOperand(str, offset)); + __ Ldrd(temp2, out, MemOperand(arg, offset)); + __ Cmp(temp, temp2); + __ B(ne, &return_false, /* far_label */ false); + __ Cmp(temp1, out); + __ B(ne, &return_false, /* far_label */ false); + offset += 2u * sizeof(uint32_t); + remaining_bytes -= 2u * sizeof(uint32_t); + } + if (remaining_bytes != 0u) { + __ Ldr(temp, MemOperand(str, offset)); + __ Ldr(out, MemOperand(arg, offset)); + __ Cmp(temp, out); + __ B(ne, &return_false, /* far_label */ false); + } + } else { + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); - // Store offset of string value in preparation for comparison loop. - __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp as unsigned. + __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. + ExactAssemblyScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cs); // If uncompressed, + __ add(cs, temp, temp, temp); // double the byte count. + } - // Loop to compare strings 4 bytes at a time starting at the front of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. - __ Bind(&loop); - __ Ldr(out, MemOperand(str, temp1)); - __ Ldr(temp2, MemOperand(arg, temp1)); - __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); - __ Cmp(out, temp2); - __ B(ne, &return_false, /* far_target */ false); - // With string compression, we have compared 4 bytes, otherwise 2 chars. - __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop, /* far_target */ false); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); + vixl32::Register temp2 = scratch_scope.Acquire(); + + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + + // Loop to compare strings 4 bytes at a time starting at the front of the string. + __ Bind(&loop); + __ Ldr(out, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); + __ Cmp(out, temp2); + __ B(ne, &return_false, /* far_target */ false); + // With string compression, we have compared 4 bytes, otherwise 2 chars. + __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); + __ B(hi, &loop, /* far_target */ false); + } // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -2723,11 +2807,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(arena_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { @@ -2753,11 +2833,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(arena_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { @@ -2827,6 +2903,137 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); } +static void GenHighestOneBit(HInvoke* invoke, + Primitive::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(Primitive::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == Primitive::kPrimLong) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out_reg_lo, in_reg_lo); + __ Clz(out_reg_hi, in_reg_hi); + __ Lsr(out_reg_lo, temp, out_reg_lo); + __ Lsrs(out_reg_hi, temp, out_reg_hi); + + // Discard result for lowest 32 bits if highest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_lo, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out, in); + __ Lsr(out, temp, out); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimInt, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + CreateLongToLongLocationsWithOverlap(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimLong, codegen_); +} + +static void GenLowestOneBit(HInvoke* invoke, + Primitive::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(Primitive::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == Primitive::kPrimLong) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Rsb(out_reg_hi, in_reg_hi, 0); + __ Rsb(out_reg_lo, in_reg_lo, 0); + __ And(out_reg_hi, out_reg_hi, in_reg_hi); + // The result of this operation is 0 iff in_reg_lo is 0 + __ Ands(out_reg_lo, out_reg_lo, in_reg_lo); + + // Discard result for highest 32 bits if lowest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_hi, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Rsb(temp, in, 0); + __ And(out, temp, in); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimInt, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + CreateLongToLongLocationsWithOverlap(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimLong, codegen_); +} + void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, @@ -3124,10 +3331,6 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 4cea6dfdfb..2669d97d82 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -22,6 +22,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" +#include "mirror/object_array-inl.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index d785567e0f..74be954a75 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -22,6 +22,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" +#include "mirror/object_array-inl.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 86fb8e0165..02bc254729 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -461,28 +461,15 @@ class HeapLocationCollector : public HGraphVisitor { has_heap_stores_ = true; } - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - CreateReferenceInfoForReferenceType(new_instance); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitParameterValue(HParameterValue* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitSelect(HSelect* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) OVERRIDE { + // Any new-instance or new-array cannot alias with references that + // pre-exist the new-instance/new-array. We append entries into + // ref_info_array_ which keeps track of the order of creation + // of reference values since we visit the blocks in reverse post order. + // + // By default, VisitXXX() (including VisitPhi()) calls VisitInstruction(), + // unless VisitXXX() is overridden. VisitInstanceFieldGet() etc. above + // also call CreateReferenceInfoForReferenceType() explicitly. CreateReferenceInfoForReferenceType(instruction); } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 0ef7dcdb59..a249cacc93 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -256,6 +256,48 @@ static bool IsAddConst(HInstruction* instruction, return false; } +// Detect reductions of the following forms, +// under assumption phi has only *one* use: +// x = x_phi + .. +// x = x_phi - .. +// x = max(x_phi, ..) +// x = min(x_phi, ..) +static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { + if (reduction->IsAdd()) { + return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi; + } else if (reduction->IsSub()) { + return reduction->InputAt(0) == phi; + } else if (reduction->IsInvokeStaticOrDirect()) { + switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: + return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi; + default: + return false; + } + } + return false; +} + +// Translates operation to reduction kind. +static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) { + if (reduction->IsVecAdd() || reduction->IsVecSub()) { + return HVecReduce::kSum; + } else if (reduction->IsVecMin()) { + return HVecReduce::kMin; + } else if (reduction->IsVecMax()) { + return HVecReduce::kMax; + } + LOG(FATAL) << "Unsupported SIMD reduction"; + UNREACHABLE(); +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; @@ -280,18 +322,18 @@ static bool CheckInductionSetFullyRemoved(ArenaSet<HInstruction*>* iset) { return false; } } - return true; } // -// Class methods. +// Public methods. // HLoopOptimization::HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis) - : HOptimization(graph, kLoopOptimizationPassName), + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats) + : HOptimization(graph, kLoopOptimizationPassName, stats), compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), @@ -299,14 +341,15 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, top_loop_(nullptr), last_loop_(nullptr), iset_(nullptr), - induction_simplication_count_(0), + reductions_(nullptr), simplified_(false), vector_length_(0), vector_refs_(nullptr), vector_peeling_candidate_(nullptr), vector_runtime_test_a_(nullptr), vector_runtime_test_b_(nullptr), - vector_map_(nullptr) { + vector_map_(nullptr), + vector_permanent_map_(nullptr) { } void HLoopOptimization::Run() { @@ -333,6 +376,10 @@ void HLoopOptimization::Run() { last_loop_ = top_loop_ = nullptr; } +// +// Loop setup and traversal. +// + void HLoopOptimization::LocalRun() { // Build the linear order using the phase-local allocator. This step enables building // a loop hierarchy that properly reflects the outer-inner and previous-next relation. @@ -351,19 +398,27 @@ void HLoopOptimization::LocalRun() { // should use the global allocator. if (top_loop_ != nullptr) { ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSafeMap<HInstruction*, HInstruction*> reds( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); ArenaSafeMap<HInstruction*, HInstruction*> map( std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSafeMap<HInstruction*, HInstruction*> perm( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); // Attach. iset_ = &iset; + reductions_ = &reds; vector_refs_ = &refs; vector_map_ = ↦ + vector_permanent_map_ = &perm; // Traverse. TraverseLoopsInnerToOuter(top_loop_); // Detach. iset_ = nullptr; + reductions_ = nullptr; vector_refs_ = nullptr; vector_map_ = nullptr; + vector_permanent_map_ = nullptr; } } @@ -414,16 +469,12 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) { } } -void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { +bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { + bool changed = false; for ( ; node != nullptr; node = node->next) { - // Visit inner loops first. - uint32_t current_induction_simplification_count = induction_simplication_count_; - if (node->inner != nullptr) { - TraverseLoopsInnerToOuter(node->inner); - } - // Recompute induction information of this loop if the induction - // of any inner loop has been simplified. - if (current_induction_simplification_count != induction_simplication_count_) { + // Visit inner loops first. Recompute induction information for this + // loop if the induction of any inner loop has changed. + if (TraverseLoopsInnerToOuter(node->inner)) { induction_range_.ReVisit(node->loop_info); } // Repeat simplifications in the loop-body until no more changes occur. @@ -433,12 +484,14 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { simplified_ = false; SimplifyInduction(node); SimplifyBlocks(node); + changed = simplified_ || changed; } while (simplified_); // Optimize inner loop. if (node->inner == nullptr) { - OptimizeInnerLoop(node); + changed = OptimizeInnerLoop(node) || changed; } } + return changed; } // @@ -455,20 +508,18 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - iset_->clear(); // prepare phi induction if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { // Note that it's ok to have replaced uses after the loop with the last value, without // being able to remove the cycle. Environment uses (which are the reason we may not be - // able to remove the cycle) within the loop will still hold the right value. + // able to remove the cycle) within the loop will still hold the right value. We must + // have tried first, however, to replace outside uses. if (CanRemoveCycle()) { + simplified_ = true; for (HInstruction* i : *iset_) { RemoveFromCycle(i); } - - // Check that there are no records of the deleted instructions. DCHECK(CheckInductionSetFullyRemoved(iset_)); - simplified_ = true; } } } @@ -511,21 +562,20 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. int64_t trip_count = 0; if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { - return; + return false; } - // Ensure there is only a single loop-body (besides the header). HBasicBlock* body = nullptr; for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { if (it.Current() != header) { if (body != nullptr) { - return; + return false; } body = it.Current(); } @@ -533,27 +583,27 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { CHECK(body != nullptr); // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { - return; + return false; } HBasicBlock* exit = (header->GetSuccessors()[0] == body) ? header->GetSuccessors()[1] : header->GetSuccessors()[0]; // Ensure exit can only be reached by exiting loop. if (exit->GetPredecessors().size() != 1) { - return; + return false; } // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. - HInstruction* phi = header->GetFirstPhi(); - iset_->clear(); // prepare phi induction - if (TrySetSimpleLoopHeader(header)) { + HPhi* main_phi = nullptr; + if (TrySetSimpleLoopHeader(header, &main_phi)) { bool is_empty = IsEmptyBody(body); - if ((is_empty || trip_count == 1) && - TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { + if (reductions_->empty() && // TODO: possible with some effort + (is_empty || trip_count == 1) && + TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { if (!is_empty) { // Unroll the loop-body, which sees initial value of the index. - phi->ReplaceWith(phi->InputAt(0)); + main_phi->ReplaceWith(main_phi->InputAt(0)); preheader->MergeInstructionsWith(body); } body->DisconnectAndDelete(); @@ -566,21 +616,20 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { preheader->AddDominatedBlock(exit); exit->SetDominator(preheader); RemoveLoop(node); // update hierarchy - return; + return true; } } - // Vectorize loop, if possible and valid. - if (kEnableVectorization) { - iset_->clear(); // prepare phi induction - if (TrySetSimpleLoopHeader(header) && - ShouldVectorize(node, body, trip_count) && - TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { - Vectorize(node, body, exit, trip_count); - graph_->SetHasSIMD(true); // flag SIMD usage - return; - } + if (kEnableVectorization && + TrySetSimpleLoopHeader(header, &main_phi) && + ShouldVectorize(node, body, trip_count) && + TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { + Vectorize(node, body, exit, trip_count); + graph_->SetHasSIMD(true); // flag SIMD usage + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + return true; } + return false; } // @@ -621,6 +670,8 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 // aliased, as well as the property that references either point to the same // array or to two completely disjoint arrays, i.e., no partial aliasing. // Other than a few simply heuristics, no detailed subscript analysis is done. + // The scan over references also finds a suitable dynamic loop peeling candidate. + const ArrayReference* candidate = nullptr; for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) { for (auto j = i; ++j != vector_refs_->end(); ) { if (i->type == j->type && (i->lhs || j->lhs)) { @@ -656,7 +707,7 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 } // Consider dynamic loop peeling for alignment. - SetPeelingCandidate(trip_count); + SetPeelingCandidate(candidate, trip_count); // Success! return true; @@ -679,14 +730,15 @@ void HLoopOptimization::Vectorize(LoopNode* node, bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0; // Adjust vector bookkeeping. - iset_->clear(); // prepare phi induction - bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_ + HPhi* main_phi = nullptr; + bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets DCHECK(is_simple_loop_header); vector_header_ = header; vector_body_ = block; - // Generate dynamic loop peeling trip count, if needed: - // ptc = <peeling-needed-for-candidate> + // Generate dynamic loop peeling trip count, if needed, under the assumption + // that the Android runtime guarantees at least "component size" alignment: + // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size HInstruction* ptc = nullptr; if (vector_peeling_candidate_ != nullptr) { DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count"; @@ -769,12 +821,20 @@ void HLoopOptimization::Vectorize(LoopNode* node, /*unroll*/ 1); } + // Link reductions to their final uses. + for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { + if (i->first->IsPhi()) { + i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second)); + } + } + // Remove the original loop by disconnecting the body block // and removing all instructions from the header. block->DisconnectAndDelete(); while (!header->GetFirstInstruction()->IsGoto()) { header->RemoveInstruction(header->GetFirstInstruction()); } + // Update loop hierarchy: the old header now resides in the same outer loop // as the old preheader. Note that we don't bother putting sequential // loops back in the hierarchy at this point. @@ -807,21 +867,10 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, vector_header_->AddInstruction(cond); vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); vector_index_ = phi; + vector_permanent_map_->clear(); // preserved over unrolling for (uint32_t u = 0; u < unroll; u++) { - // Clear map, leaving loop invariants setup during unrolling. - if (u == 0) { - vector_map_->clear(); - } else { - for (auto i = vector_map_->begin(); i != vector_map_->end(); ) { - if (i->second->IsVecReplicateScalar()) { - DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first)); - ++i; - } else { - i = vector_map_->erase(i); - } - } - } // Generate instruction map. + vector_map_->clear(); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); DCHECK(vectorized_def); @@ -838,16 +887,23 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, } } } + // Generate the induction. vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); Insert(vector_body_, vector_index_); } - // Finalize phi for the loop index. + // Finalize phi inputs for the reductions (if any). + for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { + if (!i->first->IsPhi()) { + DCHECK(i->second->IsPhi()); + GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first); + } + } + // Finalize phi inputs for the loop index. phi->AddInput(lo); phi->AddInput(vector_index_); vector_index_ = phi; } -// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc. bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code) { @@ -877,6 +933,23 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, } return false; } + // Accept a left-hand-side reduction for + // (1) supported vector type, + // (2) vectorizable right-hand-side value. + auto redit = reductions_->find(instruction); + if (redit != reductions_->end()) { + Primitive::Type type = instruction->GetType(); + if (TrySetVectorType(type, &restrictions) && + VectorizeUse(node, instruction, generate_code, type, restrictions)) { + if (generate_code) { + HInstruction* new_red = vector_map_->Get(instruction); + vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); + vector_permanent_map_->Overwrite(redit->second, new_red); + } + return true; + } + return false; + } // Branch back okay. if (instruction->IsGoto()) { return true; @@ -932,6 +1005,21 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } + } else if (instruction->IsPhi()) { + // Accept particular phi operations. + if (reductions_->find(instruction) != reductions_->end()) { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoReduction)) { + return false; + } + // Accept a reduction. + if (generate_code) { + GenerateVecReductionPhi(instruction->AsPhi()); + } + return true; + } + // TODO: accept right-hand-side induction? + return false; } else if (instruction->IsTypeConversion()) { // Accept particular type conversions. HTypeConversion* conversion = instruction->AsTypeConversion(); @@ -1118,18 +1206,18 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric case kArm: case kThumb2: // Allow vectorization for all ARM devices, because Android assumes that - // ARM 32-bit always supports advanced SIMD. + // ARM 32-bit always supports advanced SIMD (64-bit SIMD). switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(8); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; return TrySetVectorLength(4); case Primitive::kPrimInt: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(2); default: break; @@ -1137,15 +1225,15 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric return false; case kArm64: // Allow vectorization for all ARM devices, because Android assumes that - // ARMv8 AArch64 always supports advanced SIMD. + // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD). switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1154,24 +1242,27 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv | kNoMul | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: + *restrictions |= kNoReduction; return TrySetVectorLength(4); case Primitive::kPrimDouble: + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: return false; } case kX86: case kX86_64: - // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors). + // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD). if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; + *restrictions |= + kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1180,10 +1271,10 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // -0.0 vs +0.0 + *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // -0.0 vs +0.0 + *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 return TrySetVectorLength(2); default: break; @@ -1195,23 +1286,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // min/max(x, NaN) + *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // min/max(x, NaN) + *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) return TrySetVectorLength(2); default: break; @@ -1223,23 +1314,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // min/max(x, NaN) + *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // min/max(x, NaN) + *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) return TrySetVectorLength(2); default: break; @@ -1272,9 +1363,16 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) return; } // In vector code, explicit scalar expansion is needed. - HInstruction* vector = new (global_allocator_) HVecReplicateScalar( - global_allocator_, org, type, vector_length_); - vector_map_->Put(org, Insert(vector_preheader_, vector)); + HInstruction* vector = nullptr; + auto it = vector_permanent_map_->find(org); + if (it != vector_permanent_map_->end()) { + vector = it->second; // reuse during unrolling + } else { + vector = new (global_allocator_) HVecReplicateScalar( + global_allocator_, org, type, vector_length_); + vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); + } + vector_map_->Put(org, vector); } } @@ -1310,8 +1408,6 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, global_allocator_, base, opa, type, vector_length_, is_string_char_at); } // Known dynamically enforced alignment? - // TODO: detect offset + constant differences. - // TODO: long run, static alignment analysis? if (vector_peeling_candidate_ != nullptr && vector_peeling_candidate_->base == base && vector_peeling_candidate_->offset == offset) { @@ -1331,6 +1427,78 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, vector_map_->Put(org, vector); } +void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { + DCHECK(reductions_->find(phi) != reductions_->end()); + DCHECK(reductions_->Get(phi->InputAt(1)) == phi); + HInstruction* vector = nullptr; + if (vector_mode_ == kSequential) { + HPhi* new_phi = new (global_allocator_) HPhi( + global_allocator_, kNoRegNumber, 0, phi->GetType()); + vector_header_->AddPhi(new_phi); + vector = new_phi; + } else { + // Link vector reduction back to prior unrolled update, or a first phi. + auto it = vector_permanent_map_->find(phi); + if (it != vector_permanent_map_->end()) { + vector = it->second; + } else { + HPhi* new_phi = new (global_allocator_) HPhi( + global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType); + vector_header_->AddPhi(new_phi); + vector = new_phi; + } + } + vector_map_->Put(phi, vector); +} + +void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) { + HInstruction* new_phi = vector_map_->Get(phi); + HInstruction* new_init = reductions_->Get(phi); + HInstruction* new_red = vector_map_->Get(reduction); + // Link unrolled vector loop back to new phi. + for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) { + DCHECK(new_phi->IsVecOperation()); + } + // Prepare the new initialization. + if (vector_mode_ == kVector) { + // Generate a [initial, 0, .., 0] vector. + new_init = Insert( + vector_preheader_, + new (global_allocator_) HVecSetScalars( + global_allocator_, &new_init, phi->GetType(), vector_length_, 1)); + } else { + new_init = ReduceAndExtractIfNeeded(new_init); + } + // Set the phi inputs. + DCHECK(new_phi->IsPhi()); + new_phi->AsPhi()->AddInput(new_init); + new_phi->AsPhi()->AddInput(new_red); + // New feed value for next phi (safe mutation in iteration). + reductions_->find(phi)->second = new_phi; +} + +HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) { + if (instruction->IsPhi()) { + HInstruction* input = instruction->InputAt(1); + if (input->IsVecOperation()) { + Primitive::Type type = input->AsVecOperation()->GetPackedType(); + HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0]; + // Generate a vector reduction and scalar extract + // x = REDUCE( [x_1, .., x_n] ) + // y = x_1 + // along the exit of the defining loop. + HVecReduce::ReductionKind kind = GetReductionKind(input); + HInstruction* reduce = new (global_allocator_) HVecReduce( + global_allocator_, instruction, type, vector_length_, kind); + exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); + instruction = new (global_allocator_) HVecExtractScalar( + global_allocator_, reduce, type, vector_length_, 0); + exit->InsertInstructionAfter(instruction, reduce); + } + } + return instruction; +} + #define GENERATE_VEC(x, y) \ if (vector_mode_ == kVector) { \ vector = (x); \ @@ -1511,10 +1679,9 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 // (note whether the sign bit in wider precision is shifted in has no effect // on the narrow precision computed by the idiom). - int64_t distance = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { + IsInt64Value(instruction->InputAt(1), 1)) { // Test for (a + b + c) >> 1 for optional constant c. HInstruction* a = nullptr; HInstruction* b = nullptr; @@ -1559,6 +1726,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, vector_length_, is_unsigned, is_rounded)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); } @@ -1586,9 +1754,11 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { return true; } -void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) { +void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate, + int64_t trip_count ATTRIBUTE_UNUSED) { // Current heuristic: none. // TODO: implement + vector_peeling_candidate_ = candidate; } uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { @@ -1616,13 +1786,17 @@ uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_ // bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { + // Start with empty phi induction. + iset_->clear(); + // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't // smart enough to follow strongly connected components (and it's probably not worth // it to make it so). See b/33775412. if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) { return false; } - DCHECK(iset_->empty()); + + // Lookup phi induction cycle. ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { for (HInstruction* i : *set) { @@ -1634,6 +1808,7 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { } else if (!i->IsRemovable()) { return false; } else if (i != phi && restrict_uses) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; @@ -1647,17 +1822,65 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { return false; } -// Find: phi: Phi(init, addsub) -// s: SuspendCheck -// c: Condition(phi, bound) -// i: If(c) -// TODO: Find a less pattern matching approach? -bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { +bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) { DCHECK(iset_->empty()); - HInstruction* phi = block->GetFirstPhi(); - if (phi != nullptr && - phi->GetNext() == nullptr && - TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) { + // Only unclassified phi cycles are candidates for reductions. + if (induction_range_.IsClassified(phi)) { + return false; + } + // Accept operations like x = x + .., provided that the phi and the reduction are + // used exactly once inside the loop, and by each other. + HInputsRef inputs = phi->GetInputs(); + if (inputs.size() == 2) { + HInstruction* reduction = inputs[1]; + if (HasReductionFormat(reduction, phi)) { + HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation(); + int32_t use_count = 0; + bool single_use_inside_loop = + // Reduction update only used by phi. + reduction->GetUses().HasExactlyOneElement() && + !reduction->HasEnvironmentUses() && + // Reduction update is only use of phi inside the loop. + IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) && + iset_->size() == 1; + iset_->clear(); // leave the way you found it + if (single_use_inside_loop) { + // Link reduction back, and start recording feed value. + reductions_->Put(reduction, phi); + reductions_->Put(phi, phi->InputAt(0)); + return true; + } + } + } + return false; +} + +bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) { + // Start with empty phi induction and reductions. + iset_->clear(); + reductions_->clear(); + + // Scan the phis to find the following (the induction structure has already + // been optimized, so we don't need to worry about trivial cases): + // (1) optional reductions in loop, + // (2) the main induction, used in loop control. + HPhi* phi = nullptr; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + if (TrySetPhiReduction(it.Current()->AsPhi())) { + continue; + } else if (phi == nullptr) { + // Found the first candidate for main induction. + phi = it.Current()->AsPhi(); + } else { + return false; + } + } + + // Then test for a typical loopheader: + // s: SuspendCheck + // c: Condition(phi, bound) + // i: If(c) + if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) { HInstruction* s = block->GetFirstInstruction(); if (s != nullptr && s->IsSuspendCheck()) { HInstruction* c = s->GetNext(); @@ -1669,6 +1892,7 @@ bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { iset_->insert(c); iset_->insert(s); + *main_phi = phi; return true; } } @@ -1692,6 +1916,7 @@ bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, HInstruction* instruction) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) { return true; @@ -1704,6 +1929,7 @@ bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, bool collect_loop_uses, /*out*/ int32_t* use_count) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); if (iset_->find(user) == iset_->end()) { // not excluded? @@ -1729,6 +1955,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, // Try to replace outside uses with the last value. if (induction_range_.CanGenerateLastValue(instruction)) { HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block); + // Deal with regular uses. const HUseList<HInstruction*>& uses = instruction->GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end;) { HInstruction* user = it->GetUser(); @@ -1744,6 +1971,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, induction_range_.Replace(user, instruction, replacement); // update induction } } + // Deal with environment uses. const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { HEnvironment* user = it->GetUser(); @@ -1759,7 +1987,6 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, } } } - induction_simplication_count_++; return true; } return false; diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index de4bd85fc8..f34751815b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -34,7 +34,8 @@ class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis); + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats); void Run() OVERRIDE; @@ -62,17 +63,18 @@ class HLoopOptimization : public HOptimization { * Vectorization restrictions (bit mask). */ enum VectorRestrictions { - kNone = 0, // no restrictions - kNoMul = 1, // no multiplication - kNoDiv = 2, // no division - kNoShift = 4, // no shift - kNoShr = 8, // no arithmetic shift right - kNoHiBits = 16, // "wider" operations cannot bring in higher order bits - kNoSignedHAdd = 32, // no signed halving add - kNoUnroundedHAdd = 64, // no unrounded halving add - kNoAbs = 128, // no absolute value - kNoMinMax = 256, // no min/max - kNoStringCharAt = 512, // no StringCharAt + kNone = 0, // no restrictions + kNoMul = 1 << 0, // no multiplication + kNoDiv = 1 << 1, // no division + kNoShift = 1 << 2, // no shift + kNoShr = 1 << 3, // no arithmetic shift right + kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits + kNoSignedHAdd = 1 << 5, // no signed halving add + kNoUnroundedHAdd = 1 << 6, // no unrounded halving add + kNoAbs = 1 << 7, // no absolute value + kNoMinMax = 1 << 8, // no min/max + kNoStringCharAt = 1 << 9, // no StringCharAt + kNoReduction = 1 << 10, // no reduction }; /* @@ -104,18 +106,33 @@ class HLoopOptimization : public HOptimization { bool lhs; // def/use }; + // // Loop setup and traversal. + // + void LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); - void TraverseLoopsInnerToOuter(LoopNode* node); + // Traverses all loops inner to outer to perform simplifications and optimizations. + // Returns true if loops nested inside current loop (node) have changed. + bool TraverseLoopsInnerToOuter(LoopNode* node); + + // // Optimization. + // + void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - void OptimizeInnerLoop(LoopNode* node); + // Performs optimizations specific to inner loop (empty loop removal, + // unrolling, vectorization). Returns true if anything changed. + bool OptimizeInnerLoop(LoopNode* node); + + // // Vectorization analysis and synthesis. + // + bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); void GenerateNewLoop(LoopNode* node, @@ -140,6 +157,9 @@ class HLoopOptimization : public HOptimization { HInstruction* opb, HInstruction* offset, Primitive::Type type); + void GenerateVecReductionPhi(HPhi* phi); + void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction); + HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction); void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, @@ -155,12 +175,20 @@ class HLoopOptimization : public HOptimization { // Vectorization heuristics. bool IsVectorizationProfitable(int64_t trip_count); - void SetPeelingCandidate(int64_t trip_count); + void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count); uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count); + // // Helpers. + // + bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); - bool TrySetSimpleLoopHeader(HBasicBlock* block); + bool TrySetPhiReduction(HPhi* phi); + + // Detects loop header with a single induction (returned in main_phi), possibly + // other phis for reductions, but no other side effects. Returns true on success. + bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi); + bool IsEmptyBody(HBasicBlock* block); bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, @@ -200,10 +228,12 @@ class HLoopOptimization : public HOptimization { // Contents reside in phase-local heap memory. ArenaSet<HInstruction*>* iset_; - // Counter that tracks how many induction cycles have been simplified. Useful - // to trigger incremental updates of induction variable analysis of outer loops - // when the induction of inner loops has changed. - uint32_t induction_simplication_count_; + // Temporary bookkeeping of reduction instructions. Mapping is two-fold: + // (1) reductions in the loop-body are mapped back to their phi definition, + // (2) phi definitions are mapped to their initial value (updated during + // code generation to feed the proper values into the new chain). + // Contents reside in phase-local heap memory. + ArenaSafeMap<HInstruction*, HInstruction*>* reductions_; // Flag that tracks if any simplifications have occurred. bool simplified_; @@ -228,6 +258,10 @@ class HLoopOptimization : public HOptimization { // Contents reside in phase-local heap memory. ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; + // Permanent mapping used during vectorization synthesis. + // Contents reside in phase-local heap memory. + ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_; + // Temporary vectorization bookkeeping. VectorMode vector_mode_; // synthesis mode HBasicBlock* vector_preheader_; // preheader of the new loop diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index 5b9350689e..1c5603d00f 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest { allocator_(&pool_), graph_(CreateGraph(&allocator_)), iva_(new (&allocator_) HInductionVarAnalysis(graph_)), - loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) { + loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { BuildGraph(); } @@ -195,4 +195,44 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) { EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure()); } +// Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers' +// predecessors. +TEST_F(LoopOptimizationTest, SimplifyLoop) { + // Can't use AddLoop as we want special order for blocks predecessors. + HBasicBlock* header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* body = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(header); + graph_->AddBlock(body); + + // Control flow: make a loop back edge first in the list of predecessors. + entry_block_->RemoveSuccessor(return_block_); + body->AddSuccessor(header); + entry_block_->AddSuccessor(header); + header->AddSuccessor(body); + header->AddSuccessor(return_block_); + DCHECK(header->GetSuccessors()[1] == return_block_); + + // Data flow. + header->AddInstruction(new (&allocator_) HIf(parameter_)); + body->AddInstruction(new (&allocator_) HGoto()); + + HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); + HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, parameter_); + header->AddPhi(phi); + body->AddInstruction(add); + + phi->AddInput(add); + phi->AddInput(parameter_); + + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + + // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs + // are still mapped correctly to the block predecessors. + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); + } +} } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e34d4a2be6..217a8f29a8 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -358,6 +358,35 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { } } +// Reorder phi inputs to match reordering of the block's predecessors. +static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + HInstruction* first_instr = phi->InputAt(first); + HInstruction* second_instr = phi->InputAt(second); + phi->ReplaceInput(first_instr, second); + phi->ReplaceInput(second_instr, first); + } +} + +// Make sure that the first predecessor of a loop header is the incoming block. +void HGraph::OrderLoopHeaderPredecessors(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HLoopInformation* info = header->GetLoopInformation(); + if (info->IsBackEdge(*header->GetPredecessors()[0])) { + HBasicBlock* to_swap = header->GetPredecessors()[0]; + for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors()[pred]; + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_[pred] = to_swap; + header->predecessors_[0] = predecessor; + FixPhisAfterPredecessorsReodering(header, 0, pred); + break; + } + } + } +} + void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); @@ -381,18 +410,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the first predecessor of a loop header is the incoming block. - if (info->IsBackEdge(*header->GetPredecessors()[0])) { - HBasicBlock* to_swap = header->GetPredecessors()[0]; - for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) { - HBasicBlock* predecessor = header->GetPredecessors()[pred]; - if (!info->IsBackEdge(*predecessor)) { - header->predecessors_[pred] = to_swap; - header->predecessors_[0] = predecessor; - break; - } - } - } + OrderLoopHeaderPredecessors(header); HInstruction* first_instruction = header->GetFirstInstruction(); if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) { @@ -1774,6 +1792,10 @@ bool HBasicBlock::IsSingleGoto() const { return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsGoto(); } +bool HBasicBlock::IsSingleReturn() const { + return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsReturn(); +} + bool HBasicBlock::IsSingleTryBoundary() const { return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsTryBoundary(); } @@ -2810,6 +2832,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: + case LoadKind::kBootImageInternTable: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2840,6 +2863,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadString::LoadKind::kBootImageAddress: return os << "BootImageAddress"; + case HLoadString::LoadKind::kBootImageInternTable: + return os << "BootImageInternTable"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3e4928bd65..6bf53f7147 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -418,6 +418,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); + void OrderLoopHeaderPredecessors(HBasicBlock* header); void SimplifyLoop(HBasicBlock* header); int32_t GetNextInstructionId() { @@ -958,6 +959,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { } bool IsSingleGoto() const; + bool IsSingleReturn() const; bool IsSingleTryBoundary() const; // Returns true if this block emits nothing but a jump. @@ -1372,7 +1374,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ M(VecReplicateScalar, VecUnaryOperation) \ - M(VecSumReduce, VecUnaryOperation) \ + M(VecExtractScalar, VecUnaryOperation) \ + M(VecReduce, VecUnaryOperation) \ M(VecCnv, VecUnaryOperation) \ M(VecNeg, VecUnaryOperation) \ M(VecAbs, VecUnaryOperation) \ @@ -5869,6 +5872,10 @@ class HLoadString FINAL : public HInstruction { // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. kBootImageAddress, + // Use a PC-relative load from a boot image InternTable mmapped into the .bss + // of the oat file. + kBootImageInternTable, + // Load from an entry in the .bss section using a PC-relative load. // Used for strings outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -5928,6 +5935,7 @@ class HLoadString FINAL : public HInstruction { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBootImageAddress || + load_kind == LoadKind::kBootImageInternTable || load_kind == LoadKind::kJitTableAddress) { return false; } @@ -5988,8 +5996,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBssEntry || - GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageAddress || + GetLoadKind() == LoadKind::kBootImageInternTable || + GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -7039,6 +7048,17 @@ inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) { return false; } +// Returns true iff instruction is the given integral constant. +inline bool IsInt64Value(HInstruction* instruction, int64_t value) { + int64_t val = 0; + return IsInt64AndGet(instruction, &val) && val == value; +} + +// Returns true iff instruction is a zero bit pattern. +inline bool IsZeroBitPattern(HInstruction* instruction) { + return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); +} + #define INSTRUCTION_TYPE_CHECK(type, super) \ inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ inline const H##type* HInstruction::As##type() const { \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 6261171a00..886d75e5c7 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -63,6 +63,10 @@ class Alignment { // GetVectorLength() x GetPackedType() operations simultaneously. class HVecOperation : public HVariableInputSizeInstruction { public: + // A SIMD operation looks like a FPU location. + // TODO: we could introduce SIMD types in HIR. + static constexpr Primitive::Type kSIMDType = Primitive::kPrimDouble; + HVecOperation(ArenaAllocator* arena, Primitive::Type packed_type, SideEffects side_effects, @@ -89,10 +93,9 @@ class HVecOperation : public HVariableInputSizeInstruction { return vector_length_ * Primitive::ComponentSize(GetPackedType()); } - // Returns the type of the vector operation: a SIMD operation looks like a FPU location. - // TODO: we could introduce SIMD types in HIR. + // Returns the type of the vector operation. Primitive::Type GetType() const OVERRIDE { - return Primitive::kPrimDouble; + return kSIMDType; } // Returns the true component type packed in a vector. @@ -220,8 +223,11 @@ class HVecMemoryOperation : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation); }; -// Packed type consistency checker (same vector length integral types may mix freely). +// Packed type consistency checker ("same vector length" integral types may mix freely). inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) { + if (input->IsPhi()) { + return input->GetType() == HVecOperation::kSIMDType; // carries SIMD + } DCHECK(input->IsVecOperation()); Primitive::Type input_type = input->AsVecOperation()->GetPackedType(); switch (input_type) { @@ -265,27 +271,77 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar); }; -// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1), -// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j. -class HVecSumReduce FINAL : public HVecUnaryOperation { - HVecSumReduce(ArenaAllocator* arena, - HInstruction* input, - Primitive::Type packed_type, - size_t vector_length, - uint32_t dex_pc = kNoDexPc) +// Extracts a particular scalar from the given vector, +// viz. extract[ x1, .. , xn ] = x_i. +// +// TODO: for now only i == 1 case supported. +class HVecExtractScalar FINAL : public HVecUnaryOperation { + public: + HVecExtractScalar(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + size_t index, + uint32_t dex_pc = kNoDexPc) : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); + DCHECK_LT(index, vector_length); + DCHECK_EQ(index, 0u); + } + + // Yields a single component in the vector. + Primitive::Type GetType() const OVERRIDE { + return GetPackedType(); + } + + // An extract needs to stay in place, since SIMD registers are not + // kept alive across vector loop boundaries (yet). + bool CanBeMoved() const OVERRIDE { return false; } + + DECLARE_INSTRUCTION(VecExtractScalar); + + private: + DISALLOW_COPY_AND_ASSIGN(HVecExtractScalar); +}; + +// Reduces the given vector into the first element as sum/min/max, +// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi +// and the "-" denotes "don't care" (implementation dependent). +class HVecReduce FINAL : public HVecUnaryOperation { + public: + enum ReductionKind { + kSum = 1, + kMin = 2, + kMax = 3 + }; + + HVecReduce(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + ReductionKind kind, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc), + kind_(kind) { + DCHECK(HasConsistentPackedTypes(input, packed_type)); } - // TODO: probably integral promotion - Primitive::Type GetType() const OVERRIDE { return GetPackedType(); } + ReductionKind GetKind() const { return kind_; } bool CanBeMoved() const OVERRIDE { return true; } - DECLARE_INSTRUCTION(VecSumReduce); + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + DCHECK(other->IsVecReduce()); + const HVecReduce* o = other->AsVecReduce(); + return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind(); + } + + DECLARE_INSTRUCTION(VecReduce); private: - DISALLOW_COPY_AND_ASSIGN(HVecSumReduce); + const ReductionKind kind_; + + DISALLOW_COPY_AND_ASSIGN(HVecReduce); }; // Converts every component in the vector, @@ -754,20 +810,23 @@ class HVecUShr FINAL : public HVecBinaryOperation { // // Assigns the given scalar elements to a vector, -// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ]. +// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m, +// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n. class HVecSetScalars FINAL : public HVecOperation { + public: HVecSetScalars(ArenaAllocator* arena, HInstruction** scalars, // array Primitive::Type packed_type, size_t vector_length, + size_t number_of_scalars, uint32_t dex_pc = kNoDexPc) : HVecOperation(arena, packed_type, SideEffects::None(), - /* number_of_inputs */ vector_length, + number_of_scalars, vector_length, dex_pc) { - for (size_t i = 0; i < vector_length; i++) { + for (size_t i = 0; i < number_of_scalars; i++) { DCHECK(!scalars[i]->IsVecOperation()); SetRawInputAt(0, scalars[i]); } diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index 0238ea4602..5a56a2c210 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -332,4 +332,32 @@ TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) { EXPECT_FALSE(v1->Equals(v3)); // different vector lengths } +TEST_F(NodesVectorTest, VectorKindMattersOnReduce) { + HVecOperation* v0 = new (&allocator_) + HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); + + HVecReduce* v1 = new (&allocator_) HVecReduce( + &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kSum); + HVecReduce* v2 = new (&allocator_) HVecReduce( + &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMin); + HVecReduce* v3 = new (&allocator_) HVecReduce( + &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMax); + + EXPECT_FALSE(v0->CanBeMoved()); + EXPECT_TRUE(v1->CanBeMoved()); + EXPECT_TRUE(v2->CanBeMoved()); + EXPECT_TRUE(v3->CanBeMoved()); + + EXPECT_EQ(HVecReduce::kSum, v1->GetKind()); + EXPECT_EQ(HVecReduce::kMin, v2->GetKind()); + EXPECT_EQ(HVecReduce::kMax, v3->GetKind()); + + EXPECT_TRUE(v1->Equals(v1)); + EXPECT_TRUE(v2->Equals(v2)); + EXPECT_TRUE(v3->Equals(v3)); + + EXPECT_FALSE(v1->Equals(v2)); // different kinds + EXPECT_FALSE(v1->Equals(v3)); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 77a63acd18..fde55cb92f 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -148,27 +148,27 @@ static constexpr uint8_t expected_cfi_kMips[] = { 0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; -// 0x00000000: addiu r29, r29, -64 +// 0x00000000: addiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw r31, +60(r29) +// 0x00000004: sw ra, +60(sp) // 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw r17, +56(r29) +// 0x00000008: sw s1, +56(sp) // 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw r16, +52(r29) +// 0x0000000c: sw s0, +52(sp) // 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(r29) -// 0x00000014: sdc1 f20, +32(r29) +// 0x00000010: sdc1 f22, +40(sp) +// 0x00000014: sdc1 f20, +32(sp) // 0x00000018: .cfi_remember_state -// 0x00000018: lw r31, +60(r29) +// 0x00000018: lw ra, +60(sp) // 0x0000001c: .cfi_restore: r31 -// 0x0000001c: lw r17, +56(r29) +// 0x0000001c: lw s1, +56(sp) // 0x00000020: .cfi_restore: r17 -// 0x00000020: lw r16, +52(r29) +// 0x00000020: lw s0, +52(sp) // 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f22, +40(r29) -// 0x00000028: ldc1 f20, +32(r29) -// 0x0000002c: jr r31 -// 0x00000030: addiu r29, r29, 64 +// 0x00000024: ldc1 f22, +40(sp) +// 0x00000028: ldc1 f20, +32(sp) +// 0x0000002c: jr ra +// 0x00000030: addiu sp, sp, 64 // 0x00000034: .cfi_def_cfa_offset: 0 // 0x00000034: .cfi_restore_state // 0x00000034: .cfi_def_cfa_offset: 64 @@ -185,32 +185,32 @@ static constexpr uint8_t expected_cfi_kMips64[] = { 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -64 +// 0x00000000: daddiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd r31, +56(r29) +// 0x00000004: sd ra, +56(sp) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +48(r29) +// 0x00000008: sd s1, +48(sp) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +40(r29) +// 0x0000000c: sd s0, +40(sp) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(r29) +// 0x00000010: sdc1 f25, +32(sp) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(r29) +// 0x00000014: sdc1 f24, +24(sp) // 0x00000018: .cfi_offset: r56 at cfa-40 // 0x00000018: .cfi_remember_state -// 0x00000018: ld r31, +56(r29) +// 0x00000018: ld ra, +56(sp) // 0x0000001c: .cfi_restore: r31 -// 0x0000001c: ld r17, +48(r29) +// 0x0000001c: ld s1, +48(sp) // 0x00000020: .cfi_restore: r17 -// 0x00000020: ld r16, +40(r29) +// 0x00000020: ld s0, +40(sp) // 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f25, +32(r29) +// 0x00000024: ldc1 f25, +32(sp) // 0x00000028: .cfi_restore: r57 -// 0x00000028: ldc1 f24, +24(r29) +// 0x00000028: ldc1 f24, +24(sp) // 0x0000002c: .cfi_restore: r56 -// 0x0000002c: daddiu r29, r29, 64 +// 0x0000002c: daddiu sp, sp, 64 // 0x00000030: .cfi_def_cfa_offset: 0 -// 0x00000030: jic r31, 0 +// 0x00000030: jic ra, 0 // 0x00000034: .cfi_restore_state // 0x00000034: .cfi_def_cfa_offset: 64 @@ -330,7 +330,7 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { static constexpr uint8_t expected_asm_kMips_adjust_head[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x08, 0x00, 0x80, 0x14, 0xFC, 0xFF, 0xBD, 0x27, 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, @@ -345,42 +345,42 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; -// 0x00000000: addiu r29, r29, -64 +// 0x00000000: addiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw r31, +60(r29) +// 0x00000004: sw ra, +60(sp) // 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw r17, +56(r29) +// 0x00000008: sw s1, +56(sp) // 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw r16, +52(r29) +// 0x0000000c: sw s0, +52(sp) // 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(r29) -// 0x00000014: sdc1 f20, +32(r29) -// 0x00000018: bne r0, r4, 0x00000040 ; +36 -// 0x0000001c: addiu r29, r29, -4 +// 0x00000010: sdc1 f22, +40(sp) +// 0x00000014: sdc1 f20, +32(sp) +// 0x00000018: bnez a0, 0x0000003c ; +36 +// 0x0000001c: addiu sp, sp, -4 // 0x00000020: .cfi_def_cfa_offset: 68 -// 0x00000020: sw r31, +0(r29) -// 0x00000024: bltzal r0, 0x0000002c ; +4 -// 0x00000028: lui r1, 0x20000 -// 0x0000002c: ori r1, r1, 24 -// 0x00000030: addu r1, r1, r31 -// 0x00000034: lw r31, +0(r29) -// 0x00000038: jr r1 -// 0x0000003c: addiu r29, r29, 4 +// 0x00000020: sw ra, +0(sp) +// 0x00000024: nal +// 0x00000028: lui at, 2 +// 0x0000002c: ori at, at, 24 +// 0x00000030: addu at, at, ra +// 0x00000034: lw ra, +0(sp) +// 0x00000038: jr at +// 0x0000003c: addiu sp, sp, 4 // 0x00000040: .cfi_def_cfa_offset: 64 // 0x00000040: nop // ... // 0x00020040: nop // 0x00020044: .cfi_remember_state -// 0x00020044: lw r31, +60(r29) +// 0x00020044: lw ra, +60(sp) // 0x00020048: .cfi_restore: r31 -// 0x00020048: lw r17, +56(r29) +// 0x00020048: lw s1, +56(sp) // 0x0002004c: .cfi_restore: r17 -// 0x0002004c: lw r16, +52(r29) +// 0x0002004c: lw s0, +52(sp) // 0x00020050: .cfi_restore: r16 -// 0x00020050: ldc1 f22, +40(r29) -// 0x00020054: ldc1 f20, +32(r29) -// 0x00020058: jr r31 -// 0x0002005c: addiu r29, r29, 64 +// 0x00020050: ldc1 f22, +40(sp) +// 0x00020054: ldc1 f20, +32(sp) +// 0x00020058: jr ra +// 0x0002005c: addiu sp, sp, 64 // 0x00020060: .cfi_def_cfa_offset: 0 // 0x00020060: .cfi_restore_state // 0x00020060: .cfi_def_cfa_offset: 64 @@ -401,37 +401,37 @@ static constexpr uint8_t expected_cfi_kMips64_adjust[] = { 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -64 +// 0x00000000: daddiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd r31, +56(r29) +// 0x00000004: sd ra, +56(sp) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +48(r29) +// 0x00000008: sd s1, +48(sp) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +40(r29) +// 0x0000000c: sd s0, +40(sp) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(r29) +// 0x00000010: sdc1 f25, +32(sp) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(r29) +// 0x00000014: sdc1 f24, +24(sp) // 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: bnec r5, r6, 0x00000024 ; +12 -// 0x0000001c: auipc r1, 2 -// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080 +// 0x00000018: bnec a1, a2, 0x00000024 ; +12 +// 0x0000001c: auipc at, 2 +// 0x00000020: jic at, 12 ; bc 0x00020028 ; +131080 // 0x00000024: nop // ... // 0x00020024: nop // 0x00020028: .cfi_remember_state -// 0x00020028: ld r31, +56(r29) +// 0x00020028: ld ra, +56(sp) // 0x0002002c: .cfi_restore: r31 -// 0x0002002c: ld r17, +48(r29) +// 0x0002002c: ld s1, +48(sp) // 0x00020030: .cfi_restore: r17 -// 0x00020030: ld r16, +40(r29) +// 0x00020030: ld s0, +40(sp) // 0x00020034: .cfi_restore: r16 -// 0x00020034: ldc1 f25, +32(r29) +// 0x00020034: ldc1 f25, +32(sp) // 0x00020038: .cfi_restore: r57 -// 0x00020038: ldc1 f24, +24(r29) +// 0x00020038: ldc1 f24, +24(sp) // 0x0002003c: .cfi_restore: r56 -// 0x0002003c: daddiu r29, r29, 64 +// 0x0002003c: daddiu sp, sp, 64 // 0x00020040: .cfi_def_cfa_offset: 0 -// 0x00020040: jic r31, 0 +// 0x00020040: jic ra, 0 // 0x00020044: .cfi_restore_state // 0x00020044: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b45f3c6b33..399cd98983 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -22,8 +22,6 @@ #include <stdint.h> -#include "android-base/strings.h" - #ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif @@ -492,7 +490,7 @@ static HOptimization* BuildOptimization( } else if (opt_name == HSharpening::kSharpeningPassName) { return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles); } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) { - return new (arena) HSelectGenerator(graph, stats); + return new (arena) HSelectGenerator(graph, handles, stats); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { @@ -512,7 +510,7 @@ static HOptimization* BuildOptimization( } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { - return new (arena) HLoopOptimization(graph, driver, most_recent_induction); + return new (arena) HLoopOptimization(graph, driver, most_recent_induction, stats); } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { return new (arena) CHAGuardOptimization(graph); } else if (opt_name == CodeSinking::kCodeSinkingPassName) { @@ -763,7 +761,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier( graph, codegen, driver, stats); - HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); + HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, handles, stats); HConstantFolding* fold2 = new (arena) HConstantFolding( graph, "constant_folding$after_inlining"); HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce"); @@ -775,7 +773,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, LICM* licm = new (arena) LICM(graph, *side_effects1, stats); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); - HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction, stats); LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats); HSharpening* sharpening = new (arena) HSharpening( @@ -1134,12 +1132,7 @@ Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { bool IsCompilingWithCoreImage() { const std::string& image = Runtime::Current()->GetImageLocation(); - // TODO: This is under-approximating... - if (android::base::EndsWith(image, "core.art") || - android::base::EndsWith(image, "core-optimizing.art")) { - return true; - } - return false; + return CompilerDriver::IsCoreImageFilename(image); } bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { @@ -1233,14 +1226,14 @@ bool OptimizingCompiler::JitCompile(Thread* self, uint8_t* stack_map_data = nullptr; uint8_t* method_info_data = nullptr; uint8_t* roots_data = nullptr; - code_cache->ReserveData(self, - stack_map_size, - method_info_size, - number_of_roots, - method, - &stack_map_data, - &method_info_data, - &roots_data); + uint32_t data_size = code_cache->ReserveData(self, + stack_map_size, + method_info_size, + number_of_roots, + method, + &stack_map_data, + &method_info_data, + &roots_data); if (stack_map_data == nullptr || roots_data == nullptr) { return false; } @@ -1261,6 +1254,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), code_allocator.GetSize(), + data_size, osr, roots, codegen->GetGraph()->HasShouldDeoptimizeFlag(), diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index af7ab2f1a0..07f9635aba 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -63,6 +63,8 @@ enum MethodCompilationStat { kBooleanSimplified, kIntrinsicRecognized, kLoopInvariantMoved, + kLoopVectorized, + kLoopVectorizedIdiom, kSelectGenerated, kRemovedInstanceOf, kInlinedInvokeVirtualOrInterface, @@ -184,6 +186,8 @@ class OptimizingCompilerStats { case kBooleanSimplified : name = "BooleanSimplified"; break; case kIntrinsicRecognized : name = "IntrinsicRecognized"; break; case kLoopInvariantMoved : name = "LoopInvariantMoved"; break; + case kLoopVectorized : name = "LoopVectorized"; break; + case kLoopVectorizedIdiom : name = "LoopVectorizedIdiom"; break; case kSelectGenerated : name = "SelectGenerated"; break; case kRemovedInstanceOf: name = "RemovedInstanceOf"; break; case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break; diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 21b645279e..4cb99f9b5c 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -88,8 +88,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 2743df9dcf..c463ecdb0a 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -92,6 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 561c9eafa2..93613a5542 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -754,8 +754,23 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } } +void ReferenceTypePropagation::FixUpInstructionType(HInstruction* instruction, + VariableSizedHandleScope* handle_scope) { + if (instruction->IsSelect()) { + ScopedObjectAccess soa(Thread::Current()); + HandleCache handle_cache(handle_scope); + HSelect* select = instruction->AsSelect(); + ReferenceTypeInfo false_rti = select->GetFalseValue()->GetReferenceTypeInfo(); + ReferenceTypeInfo true_rti = select->GetTrueValue()->GetReferenceTypeInfo(); + select->SetReferenceTypeInfo(MergeTypes(false_rti, true_rti, &handle_cache)); + } else { + LOG(FATAL) << "Invalid instruction in FixUpInstructionType"; + } +} + ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a, - const ReferenceTypeInfo& b) { + const ReferenceTypeInfo& b, + HandleCache* handle_cache) { if (!b.IsValid()) { return a; } @@ -780,7 +795,7 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& is_exact = false; } else if (!a_is_interface && !b_is_interface) { result_type_handle = - handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); + handle_cache->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); is_exact = false; } else { // This can happen if: @@ -790,7 +805,7 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& // void foo(Interface i, boolean cond) { // Object o = cond ? i : new Object(); // } - result_type_handle = handle_cache_.GetObjectClassHandle(); + result_type_handle = handle_cache->GetObjectClassHandle(); is_exact = false; } @@ -916,7 +931,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { if (inputs[i]->IsNullConstant()) { continue; } - new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo()); + new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo(), &handle_cache_); if (new_rti.IsValid() && new_rti.IsObjectClass()) { if (!new_rti.IsExact()) { break; diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index b19f473e27..c221282b9b 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -54,6 +54,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; + // Fix the reference type for an instruction whose inputs have changed. + // For a select instruction, the reference types of the inputs are merged + // and the resulting reference type is set on the select instruction. + static void FixUpInstructionType(HInstruction* instruction, + VariableSizedHandleScope* handle_scope); + private: class HandleCache { public: @@ -101,7 +107,9 @@ class ReferenceTypePropagation : public HOptimization { static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) REQUIRES_SHARED(Locks::mutator_lock_); - ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) + static ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, + const ReferenceTypeInfo& b, + HandleCache* handle_cache) REQUIRES_SHARED(Locks::mutator_lock_); void ValidateTypes(); diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index d537459113..cb2af91d87 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -49,7 +49,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { // Relay method to merge type in reference type propagation. ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) { - return propagation_->MergeTypes(a, b); + return propagation_->MergeTypes(a, b, &propagation_->handle_cache_); } // Helper method to construct an invalid type. @@ -163,4 +163,3 @@ TEST_F(ReferenceTypePropagationTest, MergeValidTypes) { } } // namespace art - diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 5ad011d8f9..38cd51bef6 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -554,6 +554,14 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN } void HScheduler::Schedule(HGraph* graph) { + // We run lsa here instead of in a separate pass to better control whether we + // should run the analysis or not. + LoadStoreAnalysis lsa(graph); + if (!only_optimize_loop_blocks_ || graph->HasLoops()) { + lsa.Run(); + scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector()); + } + for (HBasicBlock* block : graph->GetReversePostOrder()) { if (IsSchedulable(block)) { Schedule(block); @@ -566,14 +574,6 @@ void HScheduler::Schedule(HBasicBlock* block) { // Build the scheduling graph. scheduling_graph_.Clear(); - - // Only perform LSA/HeapLocation analysis on the basic block that - // is going to get instruction scheduled. - HeapLocationCollector heap_location_collector(block->GetGraph()); - heap_location_collector.VisitBasicBlock(block); - heap_location_collector.BuildAliasingMatrix(); - scheduling_graph_.SetHeapLocationCollector(heap_location_collector); - for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) @@ -724,8 +724,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsClassTableGet() || instruction->IsCurrentMethod() || instruction->IsDivZeroCheck() || - instruction->IsInstanceFieldGet() || - instruction->IsInstanceFieldSet() || + (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) || + (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) || instruction->IsInstanceOf() || instruction->IsInvokeInterface() || instruction->IsInvokeStaticOrDirect() || @@ -741,14 +741,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsReturn() || instruction->IsReturnVoid() || instruction->IsSelect() || - instruction->IsStaticFieldGet() || - instruction->IsStaticFieldSet() || + (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) || + (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) || instruction->IsSuspendCheck() || - instruction->IsTypeConversion() || - instruction->IsUnresolvedInstanceFieldGet() || - instruction->IsUnresolvedInstanceFieldSet() || - instruction->IsUnresolvedStaticFieldGet() || - instruction->IsUnresolvedStaticFieldSet(); + instruction->IsTypeConversion(); } bool HScheduler::IsSchedulable(const HBasicBlock* block) const { diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index ea15790105..d6eb6e3c52 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -20,6 +20,7 @@ #include "code_generator_utils.h" #include "common_arm.h" #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace arm { diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index f54d3f3de2..1d9d28ab24 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -18,6 +18,7 @@ #include "code_generator_utils.h" #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace arm64 { @@ -214,12 +215,12 @@ void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( last_visited_latency_ = kArm64SIMDReplicateOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) { + HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) { + HandleSimpleArithmeticSIMD(instr); } void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { @@ -282,8 +283,8 @@ void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; } void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { @@ -306,6 +307,10 @@ void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) { HandleSimpleArithmeticSIMD(instr); } +void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { + HandleSimpleArithmeticSIMD(instr); +} + void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64SIMDMulIntegerLatency; diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 63d5b7d6b6..e1a80ec6fb 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -83,8 +83,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(SuspendCheck , unused) \ M(TypeConversion , unused) \ M(VecReplicateScalar , unused) \ - M(VecSetScalars , unused) \ - M(VecSumReduce , unused) \ + M(VecExtractScalar , unused) \ + M(VecReduce , unused) \ M(VecCnv , unused) \ M(VecNeg , unused) \ M(VecAbs , unused) \ @@ -103,6 +103,7 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(VecShl , unused) \ M(VecShr , unused) \ M(VecUShr , unused) \ + M(VecSetScalars , unused) \ M(VecMultiplyAccumulate, unused) \ M(VecLoad , unused) \ M(VecStore , unused) diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index cb7ade915f..e220d32344 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -20,9 +20,16 @@ namespace art { static constexpr size_t kMaxInstructionsInBranch = 1u; -// Returns true if `block` has only one predecessor, ends with a Goto and -// contains at most `kMaxInstructionsInBranch` other movable instruction with -// no side-effects. +HSelectGenerator::HSelectGenerator(HGraph* graph, + VariableSizedHandleScope* handles, + OptimizingCompilerStats* stats) + : HOptimization(graph, kSelectGeneratorPassName, stats), + handle_scope_(handles) { +} + +// Returns true if `block` has only one predecessor, ends with a Goto +// or a Return and contains at most `kMaxInstructionsInBranch` other +// movable instruction with no side-effects. static bool IsSimpleBlock(HBasicBlock* block) { if (block->GetPredecessors().size() != 1u) { return false; @@ -33,7 +40,10 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch; + if (num_instructions > kMaxInstructionsInBranch) { + return false; + } + return instruction->IsGoto() || instruction->IsReturn(); } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { num_instructions++; } else { @@ -45,8 +55,8 @@ static bool IsSimpleBlock(HBasicBlock* block) { UNREACHABLE(); } -// Returns true if 'block1' and 'block2' are empty, merge into the same single -// successor and the successor can only be reached from them. +// Returns true if 'block1' and 'block2' are empty and merge into the +// same single successor. static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { return block1->GetSingleSuccessor() == block2->GetSingleSuccessor(); } @@ -94,48 +104,68 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto()) { + if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { true_block->GetFirstInstruction()->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto()) { + if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { false_block->GetFirstInstruction()->MoveBefore(if_instruction); } - DCHECK(true_block->IsSingleGoto()); - DCHECK(false_block->IsSingleGoto()); + DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); + DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); // Find the resulting true/false values. size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); DCHECK_NE(predecessor_index_true, predecessor_index_false); + bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false); - if (phi == nullptr) { + + HInstruction* true_value = nullptr; + HInstruction* false_value = nullptr; + if (both_successors_return) { + true_value = true_block->GetFirstInstruction()->InputAt(0); + false_value = false_block->GetFirstInstruction()->InputAt(0); + } else if (phi != nullptr) { + true_value = phi->InputAt(predecessor_index_true); + false_value = phi->InputAt(predecessor_index_false); + } else { continue; } - HInstruction* true_value = phi->InputAt(predecessor_index_true); - HInstruction* false_value = phi->InputAt(predecessor_index_false); + DCHECK(both_successors_return || phi != nullptr); // Create the Select instruction and insert it in front of the If. HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0), true_value, false_value, if_instruction->GetDexPc()); - if (phi->GetType() == Primitive::kPrimNot) { + if (both_successors_return) { + if (true_value->GetType() == Primitive::kPrimNot) { + DCHECK(false_value->GetType() == Primitive::kPrimNot); + ReferenceTypePropagation::FixUpInstructionType(select, handle_scope_); + } + } else if (phi->GetType() == Primitive::kPrimNot) { select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo()); } block->InsertInstructionBefore(select, if_instruction); - // Remove the true branch which removes the corresponding Phi input. - // If left only with the false branch, the Phi is automatically removed. - phi->ReplaceInput(select, predecessor_index_false); + // Remove the true branch which removes the corresponding Phi + // input if needed. If left only with the false branch, the Phi is + // automatically removed. + if (both_successors_return) { + false_block->GetFirstInstruction()->ReplaceInput(select, 0); + } else { + phi->ReplaceInput(select, predecessor_index_false); + } + bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); true_block->DisconnectAndDelete(); - DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); // Merge remaining blocks which are now connected with Goto. DCHECK_EQ(block->GetSingleSuccessor(), false_block); block->MergeWith(false_block); - if (only_two_predecessors) { + if (!both_successors_return && only_two_predecessors) { + DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); DCHECK_EQ(block->GetSingleSuccessor(), merge_block); block->MergeWith(merge_block); } diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index c6dca581cc..c060146478 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -18,7 +18,7 @@ * This optimization recognizes the common diamond selection pattern and * replaces it with an instance of the HSelect instruction. * - * Recognized pattern: + * Recognized patterns: * * If [ Condition ] * / \ @@ -26,14 +26,30 @@ * \ / * Phi [FalseValue, TrueValue] * + * and + * + * If [ Condition ] + * / \ + * false branch true branch + * return FalseValue return TrueValue + * * The pattern will be simplified if `true_branch` and `false_branch` each * contain at most one instruction without any side effects. * - * Blocks are merged into one and Select replaces the If and the Phi: + * Blocks are merged into one and Select replaces the If and the Phi. + * + * For the first pattern it simplifies to: + * * true branch * false branch * Select [FalseValue, TrueValue, Condition] * + * For the second pattern it simplifies to: + * + * true branch + * false branch + * return Select [FalseValue, TrueValue, Condition] + * * Note: In order to recognize no side-effect blocks, this optimization must be * run after the instruction simplifier has removed redundant suspend checks. */ @@ -42,19 +58,22 @@ #define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ #include "optimization.h" +#include "reference_type_propagation.h" namespace art { class HSelectGenerator : public HOptimization { public: - HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kSelectGeneratorPassName, stats) {} + HSelectGenerator(HGraph* graph, + VariableSizedHandleScope* handles, + OptimizingCompilerStats* stats); void Run() OVERRIDE; static constexpr const char* kSelectGeneratorPassName = "select_generator"; private: + VariableSizedHandleScope* handle_scope_; DISALLOW_COPY_AND_ASSIGN(HSelectGenerator); }; diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 9536d149f6..1ca63f4f86 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -278,10 +278,12 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else { // AOT app compilation. Try to lookup the string without allocating if not found. string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); - if (string != nullptr && - runtime->GetHeap()->ObjectIsInBootImageSpace(string) && - !codegen_->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + if (codegen_->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; + } else { + desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + } } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index af3b4474e3..9df1b7434a 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -82,6 +82,22 @@ void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) { } } +void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) { + // The Marking Register is only used in the Baker read barrier configuration. + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + vixl32::Label mr_is_ok; + + // temp = self.tls32_.is.gc_marking + ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value())); + // Check that mr == self.tls32_.is.gc_marking. + ___ Cmp(mr, temp); + ___ B(eq, &mr_is_ok, /* far_target */ false); + ___ Bkpt(code); + ___ Bind(&mr_is_ok); +} + void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) { // TODO(VIXL): Implement this optimization in VIXL. if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) { diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 66b22ea87c..9c11fd3222 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -178,6 +178,7 @@ class ArmVIXLAssembler FINAL : public Assembler { // // Heap poisoning. // + // Poison a heap reference contained in `reg`. void PoisonHeapReference(vixl32::Register reg); // Unpoison a heap reference contained in `reg`. @@ -187,6 +188,15 @@ class ArmVIXLAssembler FINAL : public Assembler { // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(vixl32::Register reg); + // Emit code checking the status of the Marking Register, and aborting + // the program if MR does not match the value stored in the art::Thread + // object. + // + // Argument `temp` is used as a temporary register to generate code. + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck and is passed to the BKPT instruction. + void GenerateMarkingRegisterCheck(vixl32::Register temp, int code = 0); + void StoreToOffset(StoreOperandType type, vixl32::Register reg, vixl32::Register base, diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 6ed0e9b670..d8a48a563c 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -158,6 +158,24 @@ void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) { } } +void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) { + // The Marking Register is only used in the Baker read barrier configuration. + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + vixl::aarch64::Register mr = reg_x(MR); // Marking Register. + vixl::aarch64::Register tr = reg_x(TR); // Thread Register. + vixl::aarch64::Label mr_is_ok; + + // temp = self.tls32_.is.gc_marking + ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value())); + // Check that mr == self.tls32_.is.gc_marking. + ___ Cmp(mr.W(), temp); + ___ B(eq, &mr_is_ok); + ___ Brk(code); + ___ Bind(&mr_is_ok); +} + #undef ___ } // namespace arm64 diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 5b8a34e56d..6b28363a8f 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -98,6 +98,15 @@ class Arm64Assembler FINAL : public Assembler { // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg); + // Emit code checking the status of the Marking Register, and aborting + // the program if MR does not match the value stored in the art::Thread + // object. + // + // Argument `temp` is used as a temporary register to generate code. + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction. + void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0); + void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE { UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64"; } diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index bab84bea4c..9732b765a1 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -662,7 +662,7 @@ void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) { ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64()); } -void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) { +void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(reg_x(exception->scratch_.AsXRegister())); Register temp = temps.AcquireX(); diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h index 59a1a48e20..a8ca1119e5 100644 --- a/compiler/utils/jni_macro_assembler.h +++ b/compiler/utils/jni_macro_assembler.h @@ -216,8 +216,15 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { */ virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0; + void SetEmitRunTimeChecksInDebugMode(bool value) { + emit_run_time_checks_in_debug_mode_ = value; + } + protected: - explicit JNIMacroAssembler() {} + JNIMacroAssembler() {} + + // Should run-time checks be emitted in debug mode? + bool emit_run_time_checks_in_debug_mode_ = false; }; // A "Label" class used with the JNIMacroAssembler diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 85710d0811..d835c63443 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -31,9 +31,11 @@ namespace arm64 { } // namespace arm64 namespace mips { class MipsAssembler; + class MipsLabel; } // namespace mips namespace mips64 { class Mips64Assembler; + class Mips64Label; } // namespace mips64 namespace x86 { class X86Assembler; @@ -114,7 +116,9 @@ class Label { friend class arm64::Arm64Assembler; friend class mips::MipsAssembler; + friend class mips::MipsLabel; friend class mips64::Mips64Assembler; + friend class mips64::Mips64Label; friend class x86::X86Assembler; friend class x86::NearLabel; friend class x86_64::X86_64Assembler; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 2cbabcfb32..b300cc597f 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -47,7 +47,8 @@ MipsAssembler::DelaySlot::DelaySlot() fpr_outs_mask_(0), fpr_ins_mask_(0), cc_outs_mask_(0), - cc_ins_mask_(0) {} + cc_ins_mask_(0), + patcher_label_(nullptr) {} void MipsAssembler::DsFsmInstr(uint32_t instruction, uint32_t gpr_outs_mask, @@ -55,7 +56,8 @@ void MipsAssembler::DsFsmInstr(uint32_t instruction, uint32_t fpr_outs_mask, uint32_t fpr_ins_mask, uint32_t cc_outs_mask, - uint32_t cc_ins_mask) { + uint32_t cc_ins_mask, + MipsLabel* patcher_label) { if (!reordering_) { CHECK_EQ(ds_fsm_state_, kExpectingLabel); CHECK_EQ(delay_slot_.instruction_, 0u); @@ -96,6 +98,7 @@ void MipsAssembler::DsFsmInstr(uint32_t instruction, delay_slot_.fpr_ins_mask_ = fpr_ins_mask; delay_slot_.cc_outs_mask_ = cc_outs_mask; delay_slot_.cc_ins_mask_ = cc_ins_mask; + delay_slot_.patcher_label_ = patcher_label; } void MipsAssembler::DsFsmLabel() { @@ -167,8 +170,12 @@ void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) { DsFsmInstr(0, 0, 0, 0, 0, 0, 0); } -void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) { - DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0); +void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, + Register out, + Register in1, + Register in2, + MipsLabel* patcher_label) { + DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0, patcher_label); } void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction, @@ -310,8 +317,8 @@ void MipsAssembler::EmitBranches() { // Switch from appending instructions at the end of the buffer to overwriting // existing instructions (branch placeholders) in the buffer. overwriting_ = true; - for (auto& branch : branches_) { - EmitBranch(&branch); + for (size_t id = 0; id < branches_.size(); id++) { + EmitBranch(id); } overwriting_ = false; } @@ -531,8 +538,15 @@ void MipsAssembler::Addu(Register rd, Register rs, Register rt) { DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt); } +void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) { + if (patcher_label != nullptr) { + Bind(patcher_label); + } + DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs, patcher_label); +} + void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) { - DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs); + Addiu(rt, rs, imm16, /* patcher_label */ nullptr); } void MipsAssembler::Subu(Register rd, Register rs, Register rt) { @@ -791,8 +805,15 @@ void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) { DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs); } +void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) { + if (patcher_label != nullptr) { + Bind(patcher_label); + } + DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs, patcher_label); +} + void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) { - DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs); + Lw(rt, rs, imm16, /* patcher_label */ nullptr); } void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) { @@ -866,8 +887,15 @@ void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) { DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs); } +void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) { + if (patcher_label != nullptr) { + Bind(patcher_label); + } + DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs, patcher_label); +} + void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) { - DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs); + Sw(rt, rs, imm16, /* patcher_label */ nullptr); } void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) { @@ -935,11 +963,11 @@ void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) { } void MipsAssembler::Beqz(Register rt, uint16_t imm16) { - Beq(ZERO, rt, imm16); + Beq(rt, ZERO, imm16); } void MipsAssembler::Bnez(Register rt, uint16_t imm16) { - Bne(ZERO, rt, imm16); + Bne(rt, ZERO, imm16); } void MipsAssembler::Bltz(Register rt, uint16_t imm16) { @@ -991,6 +1019,7 @@ void MipsAssembler::Jal(uint32_t addr26) { void MipsAssembler::Jalr(Register rd, Register rs) { uint32_t last_instruction = delay_slot_.instruction_; + MipsLabel* patcher_label = delay_slot_.patcher_label_; bool exchange = (last_instruction != 0 && (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 && ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0); @@ -1011,6 +1040,10 @@ void MipsAssembler::Jalr(Register rd, Register rs) { CHECK_EQ(instr1, last_instruction); buffer_.Store<uint32_t>(pos1, instr2); buffer_.Store<uint32_t>(pos2, instr1); + // Move the patcher label along with the patched instruction. + if (patcher_label != nullptr) { + patcher_label->AdjustBoundPosition(sizeof(uint32_t)); + } } else if (reordering_) { Nop(); } @@ -3118,7 +3151,7 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of } void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) { - OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 switch (initial_type) { @@ -3131,23 +3164,31 @@ void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) { type_ = kR6Literal; break; case kCall: - InitShortOrLong(offset_size, kR6Call, kR6LongCall); + InitShortOrLong(offset_size_needed, kR6Call, kR6LongCall); break; case kCondBranch: switch (condition_) { case kUncond: - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + InitShortOrLong(offset_size_needed, kR6UncondBranch, kR6LongUncondBranch); break; case kCondEQZ: case kCondNEZ: // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + type_ = (offset_size_needed <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; break; default: - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + InitShortOrLong(offset_size_needed, kR6CondBranch, kR6LongCondBranch); break; } break; + case kBareCall: + type_ = kR6BareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCondBranch: + type_ = (condition_ == kUncond) ? kR6BareUncondBranch : kR6BareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; default: LOG(FATAL) << "Unexpected branch type " << initial_type; UNREACHABLE(); @@ -3164,18 +3205,26 @@ void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) { type_ = kLiteral; break; case kCall: - InitShortOrLong(offset_size, kCall, kLongCall); + InitShortOrLong(offset_size_needed, kCall, kLongCall); break; case kCondBranch: switch (condition_) { case kUncond: - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch); break; default: - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch); break; } break; + case kBareCall: + type_ = kBareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCondBranch: + type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; default: LOG(FATAL) << "Unexpected branch type " << initial_type; UNREACHABLE(); @@ -3210,15 +3259,22 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re } } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call) +MipsAssembler::Branch::Branch(bool is_r6, + uint32_t location, + uint32_t target, + bool is_call, + bool is_bare) : old_location_(location), location_(location), target_(target), lhs_reg_(0), rhs_reg_(0), condition_(kUncond), - delayed_instruction_(kUnfilledDelaySlot) { - InitializeType((is_call ? kCall : kCondBranch), is_r6); + delayed_instruction_(kUnfilledDelaySlot), + patcher_label_(nullptr) { + InitializeType( + (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)), + is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -3226,14 +3282,16 @@ MipsAssembler::Branch::Branch(bool is_r6, uint32_t target, MipsAssembler::BranchCondition condition, Register lhs_reg, - Register rhs_reg) + Register rhs_reg, + bool is_bare) : old_location_(location), location_(location), target_(target), lhs_reg_(lhs_reg), rhs_reg_(rhs_reg), condition_(condition), - delayed_instruction_(kUnfilledDelaySlot) { + delayed_instruction_(kUnfilledDelaySlot), + patcher_label_(nullptr) { CHECK_NE(condition, kUncond); switch (condition) { case kCondLT: @@ -3276,7 +3334,7 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(kCondBranch, is_r6); + InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -3290,7 +3348,8 @@ MipsAssembler::Branch::Branch(bool is_r6, lhs_reg_(dest_reg), rhs_reg_(base_reg), condition_(kUncond), - delayed_instruction_(kUnfilledDelaySlot) { + delayed_instruction_(kUnfilledDelaySlot), + patcher_label_(nullptr) { CHECK_NE(dest_reg, ZERO); if (is_r6) { CHECK_EQ(base_reg, ZERO); @@ -3419,20 +3478,44 @@ uint32_t MipsAssembler::Branch::GetOldEndLocation() const { return GetOldLocation() + GetOldSize(); } +bool MipsAssembler::Branch::IsBare() const { + switch (type_) { + // R2 short branches (can't be promoted to long), delay slots filled manually. + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + case kR6BareUncondBranch: + case kR6BareCondBranch: + case kR6BareCall: + return true; + default: + return false; + } +} + bool MipsAssembler::Branch::IsLong() const { switch (type_) { - // R2 short branches. + // R2 short branches (can be promoted to long). case kUncondBranch: case kCondBranch: case kCall: + // R2 short branches (can't be promoted to long), delay slots filled manually. + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: // R2 near label. case kLabel: // R2 near literal. case kLiteral: - // R6 short branches. + // R6 short branches (can be promoted to long). case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + case kR6BareUncondBranch: + case kR6BareCondBranch: + case kR6BareCall: // R6 near label. case kR6Label: // R6 near literal. @@ -3464,8 +3547,9 @@ bool MipsAssembler::Branch::IsResolved() const { } MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSize() const { + bool r6_cond_branch = (type_ == kR6CondBranch || type_ == kR6BareCondBranch); OffsetBits offset_size = - (type_ == kR6CondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) ? kOffset23 : branch_info_[type_].offset_size; return offset_size; @@ -3511,8 +3595,9 @@ void MipsAssembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { } void MipsAssembler::Branch::PromoteToLong() { + CHECK(!IsBare()); // Bare branches do not promote. switch (type_) { - // R2 short branches. + // R2 short branches (can be promoted to long). case kUncondBranch: type_ = kLongUncondBranch; break; @@ -3530,7 +3615,7 @@ void MipsAssembler::Branch::PromoteToLong() { case kLiteral: type_ = kFarLiteral; break; - // R6 short branches. + // R6 short branches (can be promoted to long). case kR6UncondBranch: type_ = kR6LongUncondBranch; break; @@ -3585,7 +3670,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_ } // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. - if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) { int64_t distance = static_cast<int64_t>(target_) - location; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { @@ -3641,6 +3726,17 @@ const MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) const return &branches_[branch_id]; } +void MipsAssembler::BindRelativeToPrecedingBranch(MipsLabel* label, + uint32_t prev_branch_id_plus_one, + uint32_t position) { + if (prev_branch_id_plus_one != 0) { + const Branch* branch = GetBranch(prev_branch_id_plus_one - 1); + position -= branch->GetEndLocation(); + } + label->prev_branch_id_plus_one_ = prev_branch_id_plus_one; + label->BindTo(position); +} + void MipsAssembler::Bind(MipsLabel* label) { CHECK(!label->IsBound()); uint32_t bound_pc = buffer_.Size(); @@ -3666,22 +3762,15 @@ void MipsAssembler::Bind(MipsLabel* label) { // Now make the label object contain its own location (relative to the end of the preceding // branch, if any; it will be used by the branches referring to and following this label). - label->prev_branch_id_plus_one_ = branches_.size(); - if (label->prev_branch_id_plus_one_) { - uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; - const Branch* branch = GetBranch(branch_id); - bound_pc -= branch->GetEndLocation(); - } - label->BindTo(bound_pc); + BindRelativeToPrecedingBranch(label, branches_.size(), bound_pc); } uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); - if (label->prev_branch_id_plus_one_) { + if (label->prev_branch_id_plus_one_ != 0) { // Get label location based on the branch preceding it. - uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; - const Branch* branch = GetBranch(branch_id); + const Branch* branch = GetBranch(label->prev_branch_id_plus_one_ - 1); target += branch->GetEndLocation(); } return target; @@ -3823,10 +3912,15 @@ uint32_t MipsAssembler::Branch::GetDelayedInstruction() const { return delayed_instruction_; } -void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) { +MipsLabel* MipsAssembler::Branch::GetPatcherLabel() const { + return patcher_label_; +} + +void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label) { CHECK_NE(instruction, kUnfilledDelaySlot); CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot); delayed_instruction_ = instruction; + patcher_label_ = patcher_label; } void MipsAssembler::Branch::DecrementLocations() { @@ -3851,6 +3945,10 @@ void MipsAssembler::Branch::DecrementLocations() { } void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) { + if (branch.IsBare()) { + // Delay slots are filled manually in bare branches. + return; + } if (branch.CanHaveDelayedInstruction(delay_slot_)) { // The last instruction cannot be used in a different delay slot, // do not commit the label before it (if any). @@ -3863,34 +3961,39 @@ void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) { buffer_.Resize(size); // Attach it to the branch and adjust the branch locations. branch.DecrementLocations(); - branch.SetDelayedInstruction(delay_slot_.instruction_); + branch.SetDelayedInstruction(delay_slot_.instruction_, delay_slot_.patcher_label_); } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) { // If reordefing is disabled, prevent absorption of the target instruction. branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot); } } -void MipsAssembler::Buncond(MipsLabel* label) { +void MipsAssembler::Buncond(MipsLabel* label, bool is_r6, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); + branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ false, is_bare); MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } -void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs) { +void MipsAssembler::Bcond(MipsLabel* label, + bool is_r6, + bool is_bare, + BranchCondition condition, + Register lhs, + Register rhs) { // If lhs = rhs, this can be a NOP. if (Branch::IsNop(condition, lhs, rhs)) { return; } uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs); + branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare); MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } -void MipsAssembler::Call(MipsLabel* label) { +void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); + branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ true, is_bare); MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -4038,10 +4141,14 @@ void MipsAssembler::PromoteBranches() { // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = { - // R2 short branches. + // R2 short branches (can be promoted to long). { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 short branches (can't be promoted to long), delay slots filled manually. + { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareUncondBranch + { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareCondBranch + { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareCall // R2 near label. { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLabel // R2 near literal. @@ -4054,11 +4161,16 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLabel // R2 far literal. { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral - // R6 short branches. + // R6 short branches (can be promoted to long). { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6BareUncondBranch + { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6BareCondBranch + // Exception: kOffset23 for beqzc/bnezc. + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6BareCall // R6 near label. { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Label // R6 near literal. @@ -4073,15 +4185,49 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; +static inline bool IsAbsorbableInstruction(uint32_t instruction) { + // The relative patcher patches addiu, lw and sw with an immediate operand of 0x5678. + // We want to make sure that these instructions do not get absorbed into delay slots + // of unconditional branches on R2. Absorption would otherwise make copies of + // unpatched instructions. + if ((instruction & 0xFFFF) != 0x5678) { + return true; + } + switch (instruction >> kOpcodeShift) { + case 0x09: // Addiu. + case 0x23: // Lw. + case 0x2B: // Sw. + return false; + default: + return true; + } +} + // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. -void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { +void MipsAssembler::EmitBranch(uint32_t branch_id) { CHECK_EQ(overwriting_, true); + Branch* branch = GetBranch(branch_id); overwrite_location_ = branch->GetLocation(); uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch)); BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); uint32_t delayed_instruction = branch->GetDelayedInstruction(); + MipsLabel* patcher_label = branch->GetPatcherLabel(); + if (patcher_label != nullptr) { + // Update the patcher label location to account for branch promotion and + // delay slot filling. + CHECK(patcher_label->IsBound()); + uint32_t bound_pc = branch->GetLocation(); + if (!branch->IsLong()) { + // Short branches precede delay slots. + // Long branches follow "delay slots". + bound_pc += sizeof(uint32_t); + } + // Rebind the label. + patcher_label->Reinitialize(); + BindRelativeToPrecedingBranch(patcher_label, branch_id, bound_pc); + } switch (branch->GetType()) { // R2 short branches. case Branch::kUncondBranch: @@ -4097,8 +4243,11 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { if (offset != 0x7FFF) { uint32_t target = branch->GetTarget(); if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) { - delayed_instruction = buffer_.Load<uint32_t>(target); - offset++; + uint32_t target_instruction = buffer_.Load<uint32_t>(target); + if (IsAbsorbableInstruction(target_instruction)) { + delayed_instruction = target_instruction; + offset++; + } } } } @@ -4124,6 +4273,21 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Bal(offset); Emit(delayed_instruction); break; + case Branch::kBareUncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + B(offset); + break; + case Branch::kBareCondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondR2(condition, lhs, rhs, offset); + break; + case Branch::kBareCall: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bal(offset); + break; // R2 near label. case Branch::kLabel: @@ -4249,6 +4413,21 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Balc(offset); break; + case Branch::kR6BareUncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kR6BareCondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondR6(condition, lhs, rhs, offset); + break; + case Branch::kR6BareCall: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Balc(offset); + break; // R6 near label. case Branch::kR6Label: @@ -4309,46 +4488,51 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); + if (patcher_label != nullptr) { + // The patched instruction should look like one. + uint32_t patched_instruction = buffer_.Load<uint32_t>(GetLabelLocation(patcher_label)); + CHECK(!IsAbsorbableInstruction(patched_instruction)); + } } -void MipsAssembler::B(MipsLabel* label) { - Buncond(label); +void MipsAssembler::B(MipsLabel* label, bool is_bare) { + Buncond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare); } -void MipsAssembler::Bal(MipsLabel* label) { - Call(label); +void MipsAssembler::Bal(MipsLabel* label, bool is_bare) { + Call(label, /* is_r6 */ (IsR6() && !is_bare), is_bare); } -void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) { - Bcond(label, kCondEQ, rs, rt); +void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQ, rs, rt); } -void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label) { - Bcond(label, kCondNE, rs, rt); +void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNE, rs, rt); } -void MipsAssembler::Beqz(Register rt, MipsLabel* label) { - Bcond(label, kCondEQZ, rt); +void MipsAssembler::Beqz(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQZ, rt); } -void MipsAssembler::Bnez(Register rt, MipsLabel* label) { - Bcond(label, kCondNEZ, rt); +void MipsAssembler::Bnez(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNEZ, rt); } -void MipsAssembler::Bltz(Register rt, MipsLabel* label) { - Bcond(label, kCondLTZ, rt); +void MipsAssembler::Bltz(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLTZ, rt); } -void MipsAssembler::Bgez(Register rt, MipsLabel* label) { - Bcond(label, kCondGEZ, rt); +void MipsAssembler::Bgez(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGEZ, rt); } -void MipsAssembler::Blez(Register rt, MipsLabel* label) { - Bcond(label, kCondLEZ, rt); +void MipsAssembler::Blez(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLEZ, rt); } -void MipsAssembler::Bgtz(Register rt, MipsLabel* label) { - Bcond(label, kCondGTZ, rt); +void MipsAssembler::Bgtz(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGTZ, rt); } bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const { @@ -4399,74 +4583,130 @@ void MipsAssembler::GenerateSltForCondBranch(bool unsigned_slt, Register rs, Reg } } -void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) { - if (IsR6()) { - Bcond(label, kCondLT, rs, rt); +void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label, bool is_bare) { + if (IsR6() && !is_bare) { + Bcond(label, IsR6(), is_bare, kCondLT, rs, rt); } else if (!Branch::IsNop(kCondLT, rs, rt)) { // Synthesize the instruction (not available on R2). GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); - Bnez(AT, label); + Bnez(AT, label, is_bare); } } -void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) { - if (IsR6()) { - Bcond(label, kCondGE, rs, rt); +void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label, bool is_bare) { + if (IsR6() && !is_bare) { + Bcond(label, IsR6(), is_bare, kCondGE, rs, rt); } else if (Branch::IsUncond(kCondGE, rs, rt)) { - B(label); + B(label, is_bare); } else { // Synthesize the instruction (not available on R2). GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); - Beqz(AT, label); + Beqz(AT, label, is_bare); } } -void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) { - if (IsR6()) { - Bcond(label, kCondLTU, rs, rt); +void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare) { + if (IsR6() && !is_bare) { + Bcond(label, IsR6(), is_bare, kCondLTU, rs, rt); } else if (!Branch::IsNop(kCondLTU, rs, rt)) { // Synthesize the instruction (not available on R2). GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); - Bnez(AT, label); + Bnez(AT, label, is_bare); } } -void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) { - if (IsR6()) { - Bcond(label, kCondGEU, rs, rt); +void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare) { + if (IsR6() && !is_bare) { + Bcond(label, IsR6(), is_bare, kCondGEU, rs, rt); } else if (Branch::IsUncond(kCondGEU, rs, rt)) { - B(label); + B(label, is_bare); } else { // Synthesize the instruction (not available on R2). GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); - Beqz(AT, label); + Beqz(AT, label, is_bare); } } -void MipsAssembler::Bc1f(MipsLabel* label) { - Bc1f(0, label); +void MipsAssembler::Bc1f(MipsLabel* label, bool is_bare) { + Bc1f(0, label, is_bare); } -void MipsAssembler::Bc1f(int cc, MipsLabel* label) { +void MipsAssembler::Bc1f(int cc, MipsLabel* label, bool is_bare) { CHECK(IsUint<3>(cc)) << cc; - Bcond(label, kCondF, static_cast<Register>(cc), ZERO); + Bcond(label, /* is_r6 */ false, is_bare, kCondF, static_cast<Register>(cc), ZERO); } -void MipsAssembler::Bc1t(MipsLabel* label) { - Bc1t(0, label); +void MipsAssembler::Bc1t(MipsLabel* label, bool is_bare) { + Bc1t(0, label, is_bare); } -void MipsAssembler::Bc1t(int cc, MipsLabel* label) { +void MipsAssembler::Bc1t(int cc, MipsLabel* label, bool is_bare) { CHECK(IsUint<3>(cc)) << cc; - Bcond(label, kCondT, static_cast<Register>(cc), ZERO); + Bcond(label, /* is_r6 */ false, is_bare, kCondT, static_cast<Register>(cc), ZERO); +} + +void MipsAssembler::Bc(MipsLabel* label, bool is_bare) { + Buncond(label, /* is_r6 */ true, is_bare); +} + +void MipsAssembler::Balc(MipsLabel* label, bool is_bare) { + Call(label, /* is_r6 */ true, is_bare); +} + +void MipsAssembler::Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt); +} + +void MipsAssembler::Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt); +} + +void MipsAssembler::Beqzc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rt); +} + +void MipsAssembler::Bnezc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rt); +} + +void MipsAssembler::Bltzc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt); +} + +void MipsAssembler::Bgezc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt); +} + +void MipsAssembler::Blezc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt); +} + +void MipsAssembler::Bgtzc(Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt); +} + +void MipsAssembler::Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt); +} + +void MipsAssembler::Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt); +} + +void MipsAssembler::Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt); +} + +void MipsAssembler::Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt); } -void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) { - Bcond(label, kCondF, static_cast<Register>(ft), ZERO); +void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<Register>(ft), ZERO); } -void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) { - Bcond(label, kCondT, static_cast<Register>(ft), ZERO); +void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<Register>(ft), ZERO); } void MipsAssembler::AdjustBaseAndOffset(Register& base, diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index a7ff931e7e..0f163ac83f 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -80,6 +80,12 @@ class MipsLabel : public Label { MipsLabel(MipsLabel&& src) : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + void AdjustBoundPosition(int delta) { + CHECK(IsBound()); + // Bound label's position is negative, hence decrementing it. + position_ -= delta; + } + private: uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. @@ -215,6 +221,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // Emit Machine Instructions. void Addu(Register rd, Register rs, Register rt); + void Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label); void Addiu(Register rt, Register rs, uint16_t imm16); void Subu(Register rd, Register rs, Register rt); @@ -272,6 +279,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Lb(Register rt, Register rs, uint16_t imm16); void Lh(Register rt, Register rs, uint16_t imm16); + void Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label); void Lw(Register rt, Register rs, uint16_t imm16); void Lwl(Register rt, Register rs, uint16_t imm16); void Lwr(Register rt, Register rs, uint16_t imm16); @@ -287,6 +295,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Sb(Register rt, Register rs, uint16_t imm16); void Sh(Register rt, Register rs, uint16_t imm16); + void Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label); void Sw(Register rt, Register rs, uint16_t imm16); void Swl(Register rt, Register rs, uint16_t imm16); void Swr(Register rt, Register rs, uint16_t imm16); @@ -636,29 +645,69 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void LoadSConst32(FRegister r, int32_t value, Register temp); void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT); - // These will generate R2 branches or R6 branches as appropriate and take care of - // the delay/forbidden slots. void Bind(MipsLabel* label); - void B(MipsLabel* label); - void Bal(MipsLabel* label); - void Beq(Register rs, Register rt, MipsLabel* label); - void Bne(Register rs, Register rt, MipsLabel* label); - void Beqz(Register rt, MipsLabel* label); - void Bnez(Register rt, MipsLabel* label); - void Bltz(Register rt, MipsLabel* label); - void Bgez(Register rt, MipsLabel* label); - void Blez(Register rt, MipsLabel* label); - void Bgtz(Register rt, MipsLabel* label); - void Blt(Register rs, Register rt, MipsLabel* label); - void Bge(Register rs, Register rt, MipsLabel* label); - void Bltu(Register rs, Register rt, MipsLabel* label); - void Bgeu(Register rs, Register rt, MipsLabel* label); - void Bc1f(MipsLabel* label); // R2 - void Bc1f(int cc, MipsLabel* label); // R2 - void Bc1t(MipsLabel* label); // R2 - void Bc1t(int cc, MipsLabel* label); // R2 - void Bc1eqz(FRegister ft, MipsLabel* label); // R6 - void Bc1nez(FRegister ft, MipsLabel* label); // R6 + // When `is_bare` is false, the branches will promote to long (if the range + // of the individual branch instruction is insufficient) and the delay/ + // forbidden slots will be taken care of. + // Use `is_bare = false` when the branch target may be out of reach of the + // individual branch instruction. IOW, this is for general purpose use. + // + // When `is_bare` is true, just the branch instructions will be generated + // leaving delay/forbidden slot filling up to the caller and the branches + // won't promote to long if the range is insufficient (you'll get a + // compilation error when the range is exceeded). + // Use `is_bare = true` when the branch target is known to be within reach + // of the individual branch instruction. This is intended for small local + // optimizations around delay/forbidden slots. + // Also prefer using `is_bare = true` if the code near the branch is to be + // patched or analyzed at run time (e.g. introspection) to + // - show the intent and + // - fail during compilation rather than during patching/execution if the + // bare branch range is insufficent but the code size and layout are + // expected to remain unchanged + // + // R2 branches with delay slots that are also available on R6. + // On R6 when `is_bare` is false these convert to equivalent R6 compact + // branches (to reduce code size). On R2 or when `is_bare` is true they + // remain R2 branches with delay slots. + void B(MipsLabel* label, bool is_bare = false); + void Bal(MipsLabel* label, bool is_bare = false); + void Beq(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + void Bne(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + void Beqz(Register rt, MipsLabel* label, bool is_bare = false); + void Bnez(Register rt, MipsLabel* label, bool is_bare = false); + void Bltz(Register rt, MipsLabel* label, bool is_bare = false); + void Bgez(Register rt, MipsLabel* label, bool is_bare = false); + void Blez(Register rt, MipsLabel* label, bool is_bare = false); + void Bgtz(Register rt, MipsLabel* label, bool is_bare = false); + void Blt(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + void Bge(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + void Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + void Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare = false); + // R2-only branches with delay slots. + void Bc1f(MipsLabel* label, bool is_bare = false); // R2 + void Bc1f(int cc, MipsLabel* label, bool is_bare = false); // R2 + void Bc1t(MipsLabel* label, bool is_bare = false); // R2 + void Bc1t(int cc, MipsLabel* label, bool is_bare = false); // R2 + // R6-only compact branches without delay/forbidden slots. + void Bc(MipsLabel* label, bool is_bare = false); // R6 + void Balc(MipsLabel* label, bool is_bare = false); // R6 + // R6-only compact branches with forbidden slots. + void Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Beqzc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bnezc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bltzc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bgezc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Blezc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bgtzc(Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + void Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6 + // R6-only branches with delay slots. + void Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare = false); // R6 + void Bc1nez(FRegister ft, MipsLabel* label, bool is_bare = false); // R6 void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size); void AdjustBaseAndOffset(Register& base, @@ -1248,6 +1297,9 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t cc_ins_mask_; // Branches never operate on the LO and HI registers, hence there's // no mask for LO and HI. + + // Label for patchable instructions to allow moving them into delay slots. + MipsLabel* patcher_label_; }; // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated @@ -1268,10 +1320,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi class Branch { public: enum Type { - // R2 short branches. + // R2 short branches (can be promoted to long). kUncondBranch, kCondBranch, kCall, + // R2 short branches (can't be promoted to long), delay slots filled manually. + kBareUncondBranch, + kBareCondBranch, + kBareCall, // R2 near label. kLabel, // R2 near literal. @@ -1284,10 +1340,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi kFarLabel, // R2 far literal. kFarLiteral, - // R6 short branches. + // R6 short branches (can be promoted to long). kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + kR6BareUncondBranch, + kR6BareCondBranch, + kR6BareCall, // R6 near label. kR6Label, // R6 near literal. @@ -1337,7 +1397,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // instructions) from the instruction containing the offset. uint32_t pc_org; // How large (in bits) a PC-relative offset can be for a given type of branch (kR6CondBranch - // is an exception: use kOffset23 for beqzc/bnezc). + // and kR6BareCondBranch are an exception: use kOffset23 for beqzc/bnezc). OffsetBits offset_size; // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift // count. @@ -1346,14 +1406,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi static const BranchInfo branch_info_[/* Type */]; // Unconditional branch or call. - Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call); + Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call, bool is_bare); // Conditional branch. Branch(bool is_r6, uint32_t location, uint32_t target, BranchCondition condition, Register lhs_reg, - Register rhs_reg); + Register rhs_reg, + bool is_bare); // Label address (in literal area) or literal. Branch(bool is_r6, uint32_t location, @@ -1385,13 +1446,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t GetOldSize() const; uint32_t GetEndLocation() const; uint32_t GetOldEndLocation() const; + bool IsBare() const; bool IsLong() const; bool IsResolved() const; // Various helpers for branch delay slot management. bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const; - void SetDelayedInstruction(uint32_t instruction); + void SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label = nullptr); uint32_t GetDelayedInstruction() const; + MipsLabel* GetPatcherLabel() const; void DecrementLocations(); // Returns the bit size of the signed offset that the branch instruction can handle. @@ -1476,6 +1539,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // kUnfillableDelaySlot if none and unfillable // (the latter is only used for unconditional R2 // branches). + + MipsLabel* patcher_label_; // Patcher label for the instruction in the delay slot. }; friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); @@ -1513,9 +1578,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi VectorRegister wd, int minor_opcode); - void Buncond(MipsLabel* label); - void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); - void Call(MipsLabel* label); + void Buncond(MipsLabel* label, bool is_r6, bool is_bare); + void Bcond(MipsLabel* label, + bool is_r6, + bool is_bare, + BranchCondition condition, + Register lhs, + Register rhs = ZERO); + void Call(MipsLabel* label, bool is_r6, bool is_bare); void FinalizeLabeledBranch(MipsLabel* label); // Various helpers for branch delay slot management. @@ -1525,9 +1595,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t fpr_outs_mask, uint32_t fpr_ins_mask, uint32_t cc_outs_mask, - uint32_t cc_ins_mask); + uint32_t cc_ins_mask, + MipsLabel* patcher_label = nullptr); void DsFsmInstrNop(uint32_t instruction); - void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2); + void DsFsmInstrRrr(uint32_t instruction, + Register out, + Register in1, + Register in2, + MipsLabel* patcher_label = nullptr); void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3); void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2); void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3); @@ -1550,12 +1625,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi const Branch* GetBranch(uint32_t branch_id) const; uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; + void BindRelativeToPrecedingBranch(MipsLabel* label, + uint32_t prev_branch_id_plus_one, + uint32_t position); void EmitLiterals(); void ReserveJumpTableSpace(); void EmitJumpTables(); void PromoteBranches(); - void EmitBranch(Branch* branch); + void EmitBranch(uint32_t branch_id); void EmitBranches(); void PatchCFI(size_t number_of_delayed_adjust_pcs); diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index b72a14e906..a5cd5a7c65 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -259,12 +259,86 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, return result; } + void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*, + bool), + const std::string& instr_name, + bool has_slot, + bool is_bare = false) { + __ SetReorder(false); + mips::MipsLabel label1, label2; + (Base::GetAssembler()->*f)(&label1, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label1); + (Base::GetAssembler()->*f)(&label2, is_bare); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label2); + (Base::GetAssembler()->*f)(&label1, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " 1f\n" + + ((is_bare || !has_slot) ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + instr_name + " 2f\n" + + ((is_bare || !has_slot) ? "" : "nop\n") + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + + instr_name + " 1b\n" + + ((is_bare || !has_slot) ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + + void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register, + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::A0, &label, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::A1, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register, mips::Register, - mips::MipsLabel*), - const std::string& instr_name) { + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); mips::MipsLabel label; - (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label); + (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -274,17 +348,52 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label); + (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); std::string expected = ".set noreorder\n" + - instr_name + " $a0, $a1, 1f\n" - "nop\n" + + instr_name + " $a0, $a1, 1f\n" + + (is_bare ? "" : "nop\n") + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - instr_name + " $a2, $a3, 1b\n" - "nop\n"; + instr_name + " $a2, $a3, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + + void BranchFpuCondHelper(void (mips::MipsAssembler::*f)(mips::FRegister, + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::F0, &label, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::F30, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " $f0, 1f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $f30, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; DriverStr(expected, instr_name); } @@ -947,78 +1056,386 @@ TEST_F(AssemblerMIPS32r6Test, StoreQToOffset) { DriverStr(expected, "StoreQToOffset"); } -TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) { +////////////// +// BRANCHES // +////////////// + +TEST_F(AssemblerMIPS32r6Test, Bc) { + BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false); +} + +TEST_F(AssemblerMIPS32r6Test, Balc) { + BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false); +} + +TEST_F(AssemblerMIPS32r6Test, Beqc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bnec) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS32r6Test, Beqzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bnezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bltzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Blezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgtzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bltc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgec) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS32r6Test, Bltuc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bc1eqz) { + BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz"); +} + +TEST_F(AssemblerMIPS32r6Test, Bc1nez) { + BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez"); +} + +TEST_F(AssemblerMIPS32r6Test, B) { + BranchHelper(&mips::MipsAssembler::B, "Bc", /* has_slot */ false); +} + +TEST_F(AssemblerMIPS32r6Test, Bal) { + BranchHelper(&mips::MipsAssembler::Bal, "Balc", /* has_slot */ false); +} + +TEST_F(AssemblerMIPS32r6Test, Beq) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beqc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bne) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bnec"); +} + +TEST_F(AssemblerMIPS32r6Test, Beqz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bnez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bltz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Blez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blezc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgtz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtzc"); +} + +TEST_F(AssemblerMIPS32r6Test, Blt) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Bltc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bge) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bgec"); +} + +TEST_F(AssemblerMIPS32r6Test, Bltu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltuc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bgeu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeuc"); +} + +TEST_F(AssemblerMIPS32r6Test, BareBc) { + BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false, /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBalc) { + BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false, /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBeqc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBnec) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBeqzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBnezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBltzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBlezc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgtzc) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBltc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgec) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBltuc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgeuc) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBc1eqz) { + BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBc1nez) { + BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareB) { + BranchHelper(&mips::MipsAssembler::B, "B", /* has_slot */ true, /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBal) { + BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* has_slot */ true, /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBeq) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBne) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBeqz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBnez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBltz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBlez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgtz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBlt) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBge) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBltu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, BareBgeu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS32r6Test, LongBeqc) { mips::MipsLabel label; - __ LoadLabelAddress(mips::V0, mips::ZERO, &label); - constexpr size_t kAdduCount = 0x3FFDE; - for (size_t i = 0; i != kAdduCount; ++i) { + __ Beqc(mips::A0, mips::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Beqc(mips::A2, mips::A3, &label); - std::string expected = - "lapc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n"; - DriverStr(expected, "LoadFarthestNearLabelAddress"); + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } -TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) { +TEST_F(AssemblerMIPS32r6Test, LongBeqzc) { + constexpr uint32_t kNopCount1 = (1u << 20) + 1; + constexpr uint32_t kNopCount2 = (1u << 20) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); mips::MipsLabel label; - __ LoadLabelAddress(mips::V0, mips::ZERO, &label); - constexpr size_t kAdduCount = 0x3FFDF; - for (size_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + __ Beqzc(mips::A0, &label); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); } __ Bind(&label); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); + } + __ Beqzc(mips::A2, &label); - std::string expected = - "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "addiu $v0, $at, %lo(2f - 1b)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n"; - DriverStr(expected, "LoadNearestFarLabelAddress"); -} + uint32_t offset_forward = 2 + kNopCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. -TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { - mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips::V0, mips::ZERO, literal); - constexpr size_t kAdduCount = 0x3FFDE; - for (size_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + uint32_t offset_back = -(kNopCount2 + 1); // 1: account for bnezc. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves test time. + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnezc $a0, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" + "2:\n" << + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" + "bnezc $a2, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqzc"); +} + +TEST_F(AssemblerMIPS32r6Test, LongBc) { + constexpr uint32_t kNopCount1 = (1u << 25) + 1; + constexpr uint32_t kNopCount2 = (1u << 25) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); + mips::MipsLabel label1, label2; + __ Bc(&label1); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); + } + __ Bind(&label1); + __ Bc(&label2); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); } + __ Bind(&label2); + __ Bc(&label1); - std::string expected = - "lwpc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadFarthestNearLiteral"); -} + uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jic. -TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { - mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips::V0, mips::ZERO, literal); - constexpr size_t kAdduCount = 0x3FFDF; - for (size_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } + uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jic. - std::string expected = + uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves a few minutes + // of test time. + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "lw $v0, %lo(2f - 1b)($at)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" "2:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadNearestFarLiteral"); + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBc"); } -////////////// -// BRANCHES // -////////////// - TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) { mips::MipsLabel label; __ SetReorder(true); @@ -1112,10 +1529,62 @@ TEST_F(AssemblerMIPS32r6Test, SetReorder) { DriverStr(expected, "SetReorder"); } +TEST_F(AssemblerMIPS32r6Test, ReorderPatchedInstruction) { + __ SetReorder(true); + mips::MipsLabel label1, label2; + mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5; + __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1); + __ Bc1eqz(mips::F0, &label1); + constexpr uint32_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label1); + __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2); + __ Bc1nez(mips::F2, &label2); + constexpr uint32_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label2); + __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3); + __ Bc1eqz(mips::F4, &label1); + __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4); + __ Jalr(mips::T9); + __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5); + __ Bltc(mips::V0, mips::V1, &label2); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + "bc1eqz $f0, 1f\n" + "lw $v0, 0x5678($a0)\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc1nez $f2, 2f\n" + "sw $v0, 0x5678($a0)\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc1eqz $f4, 1b\n" + "addiu $v0, $a0, 0x5678\n" + "jalr $t9\n" + "lw $v0, 0x5678($a0)\n" + "sw $v0, 0x5678($a0)\n" + "bltc $v0, $v1, 2b\n" + "nop\n" + "addu $zero, $zero, $zero\n"; + DriverStr(expected, "ReorderPatchedInstruction"); + EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 8) * 4u); +} + TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { - mips::MipsLabel label; + mips::MipsLabel label, patcher_label1, patcher_label2; __ SetReorder(true); - __ Subu(mips::T0, mips::T1, mips::T2); + __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1); __ Bc1nez(mips::F0, &label); constexpr uint32_t kAdduCount1 = (1u << 15) + 1; for (uint32_t i = 0; i != kAdduCount1; ++i) { @@ -1126,7 +1595,7 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { for (uint32_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Subu(mips::T0, mips::T1, mips::T2); + __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2); __ Bc1eqz(mips::F0, &label); uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. @@ -1140,7 +1609,7 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { std::ostringstream oss; oss << ".set noreorder\n" - "subu $t0, $t1, $t2\n" + "addiu $t0, $t1, 0x5678\n" "bc1eqz $f0, 1f\n" "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" @@ -1148,49 +1617,88 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << "2:\n" << RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << - "subu $t0, $t1, $t2\n" + "addiu $t0, $t1, 0x5678\n" "bc1nez $f0, 3f\n" "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" "3:\n"; std::string expected = oss.str(); - DriverStr(expected, "LongBeqc"); + DriverStr(expected, "LongBranchReorder"); + EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 4) * 4u); +} + +/////////////////////// +// Loading Constants // +/////////////////////// + +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "addiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); } -// TODO: MipsAssembler::Bc -// MipsAssembler::Jic -// MipsAssembler::Jialc -// MipsAssembler::Bltc -// MipsAssembler::Bltzc -// MipsAssembler::Bgtzc -// MipsAssembler::Bgec -// MipsAssembler::Bgezc -// MipsAssembler::Blezc -// MipsAssembler::Bltuc -// MipsAssembler::Bgeuc -// MipsAssembler::Beqc -// MipsAssembler::Bnec -// MipsAssembler::Beqzc -// MipsAssembler::Bnezc -// MipsAssembler::Bc1eqz -// MipsAssembler::Bc1nez -// MipsAssembler::Buncond -// MipsAssembler::Bcond -// MipsAssembler::Call - -// TODO: AssemblerMIPS32r6Test.B -// AssemblerMIPS32r6Test.Beq -// AssemblerMIPS32r6Test.Bne -// AssemblerMIPS32r6Test.Beqz -// AssemblerMIPS32r6Test.Bnez -// AssemblerMIPS32r6Test.Bltz -// AssemblerMIPS32r6Test.Bgez -// AssemblerMIPS32r6Test.Blez -// AssemblerMIPS32r6Test.Bgtz -// AssemblerMIPS32r6Test.Blt -// AssemblerMIPS32r6Test.Bge -// AssemblerMIPS32r6Test.Bltu -// AssemblerMIPS32r6Test.Bgeu +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} // MSA instructions. diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 09175309f9..680c347fef 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -186,11 +186,51 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, return result; } + void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); + mips::MipsLabel label1, label2; + (Base::GetAssembler()->*f)(&label1, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label1); + (Base::GetAssembler()->*f)(&label2, is_bare); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label2); + (Base::GetAssembler()->*f)(&label1, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " 1f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + instr_name + " 2f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + + instr_name + " 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register, - mips::MipsLabel*), - const std::string& instr_name) { + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); mips::MipsLabel label; - (Base::GetAssembler()->*f)(mips::A0, &label); + (Base::GetAssembler()->*f)(mips::A0, &label, is_bare); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -200,26 +240,64 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - (Base::GetAssembler()->*f)(mips::A1, &label); + (Base::GetAssembler()->*f)(mips::A1, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); std::string expected = ".set noreorder\n" + - instr_name + " $a0, 1f\n" - "nop\n" + + instr_name + " $a0, 1f\n" + + (is_bare ? "" : "nop\n") + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - instr_name + " $a1, 1b\n" - "nop\n"; + instr_name + " $a1, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; DriverStr(expected, instr_name); } void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register, mips::Register, - mips::MipsLabel*), - const std::string& instr_name) { + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + + void BranchFpuCondCodeHelper(void (mips::MipsAssembler::*f)(int, + mips::MipsLabel*, + bool), + const std::string& instr_name, + bool is_bare = false) { + __ SetReorder(false); mips::MipsLabel label; - (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label); + (Base::GetAssembler()->*f)(0, &label, is_bare); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); @@ -229,17 +307,19 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label); + (Base::GetAssembler()->*f)(7, &label, is_bare); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); std::string expected = ".set noreorder\n" + - instr_name + " $a0, $a1, 1f\n" - "nop\n" + + instr_name + " $fcc0, 1f\n" + + (is_bare ? "" : "nop\n") + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - instr_name + " $a2, $a3, 1b\n" - "nop\n"; + instr_name + " $fcc7, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; DriverStr(expected, instr_name); } @@ -2072,410 +2152,136 @@ TEST_F(AssemblerMIPSTest, StoreConstToOffset) { DriverStr(expected, "StoreConstToOffset"); } +////////////// +// BRANCHES // +////////////// + TEST_F(AssemblerMIPSTest, B) { - mips::MipsLabel label1, label2; - __ B(&label1); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label1); - __ B(&label2); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label2); - __ B(&label1); + BranchHelper(&mips::MipsAssembler::B, "B"); +} - std::string expected = - ".set noreorder\n" - "b 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" - "b 2f\n" - "nop\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "2:\n" - "b 1b\n" - "nop\n"; - DriverStr(expected, "B"); +TEST_F(AssemblerMIPSTest, Bal) { + BranchHelper(&mips::MipsAssembler::Bal, "Bal"); } TEST_F(AssemblerMIPSTest, Beq) { - __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq"); } TEST_F(AssemblerMIPSTest, Bne) { - __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne"); } TEST_F(AssemblerMIPSTest, Beqz) { - __ SetReorder(false); - mips::MipsLabel label; - __ Beqz(mips::A0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Beqz(mips::A1, &label); - - std::string expected = - ".set noreorder\n" - "beq $zero, $a0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "beq $zero, $a1, 1b\n" - "nop\n"; - DriverStr(expected, "Beqz"); + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz"); } TEST_F(AssemblerMIPSTest, Bnez) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bnez(mips::A0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bnez(mips::A1, &label); - - std::string expected = - ".set noreorder\n" - "bne $zero, $a0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bne $zero, $a1, 1b\n" - "nop\n"; - DriverStr(expected, "Bnez"); + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez"); } TEST_F(AssemblerMIPSTest, Bltz) { - __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz"); } TEST_F(AssemblerMIPSTest, Bgez) { - __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez"); } TEST_F(AssemblerMIPSTest, Blez) { - __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez"); } TEST_F(AssemblerMIPSTest, Bgtz) { - __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz"); } TEST_F(AssemblerMIPSTest, Blt) { - __ SetReorder(false); - mips::MipsLabel label; - __ Blt(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Blt(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "slt $at, $a0, $a1\n" - "bne $zero, $at, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "slt $at, $a2, $a3\n" - "bne $zero, $at, 1b\n" - "nop\n"; - DriverStr(expected, "Blt"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt"); } TEST_F(AssemblerMIPSTest, Bge) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bge(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bge(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "slt $at, $a0, $a1\n" - "beq $zero, $at, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "slt $at, $a2, $a3\n" - "beq $zero, $at, 1b\n" - "nop\n"; - DriverStr(expected, "Bge"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge"); } TEST_F(AssemblerMIPSTest, Bltu) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bltu(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bltu(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "sltu $at, $a0, $a1\n" - "bne $zero, $at, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "sltu $at, $a2, $a3\n" - "bne $zero, $at, 1b\n" - "nop\n"; - DriverStr(expected, "Bltu"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu"); } TEST_F(AssemblerMIPSTest, Bgeu) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bgeu(mips::A0, mips::A1, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bgeu(mips::A2, mips::A3, &label); - - std::string expected = - ".set noreorder\n" - "sltu $at, $a0, $a1\n" - "beq $zero, $at, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "sltu $at, $a2, $a3\n" - "beq $zero, $at, 1b\n" - "nop\n"; - DriverStr(expected, "Bgeu"); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu"); } TEST_F(AssemblerMIPSTest, Bc1f) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bc1f(0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bc1f(7, &label); - - std::string expected = - ".set noreorder\n" - "bc1f $fcc0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bc1f $fcc7, 1b\n" - "nop\n"; - DriverStr(expected, "Bc1f"); + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f"); } TEST_F(AssemblerMIPSTest, Bc1t) { - __ SetReorder(false); - mips::MipsLabel label; - __ Bc1t(0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bc1t(7, &label); + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t"); +} - std::string expected = - ".set noreorder\n" - "bc1t $fcc0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bc1t $fcc7, 1b\n" - "nop\n"; - DriverStr(expected, "Bc1t"); +TEST_F(AssemblerMIPSTest, BareB) { + BranchHelper(&mips::MipsAssembler::B, "B", /* is_bare */ true); } -/////////////////////// -// Loading Constants // -/////////////////////// +TEST_F(AssemblerMIPSTest, BareBal) { + BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* is_bare */ true); +} -TEST_F(AssemblerMIPSTest, LoadConst32) { - // IsUint<16>(value) - __ LoadConst32(mips::V0, 0); - __ LoadConst32(mips::V0, 65535); - // IsInt<16>(value) - __ LoadConst32(mips::V0, -1); - __ LoadConst32(mips::V0, -32768); - // Everything else - __ LoadConst32(mips::V0, 65536); - __ LoadConst32(mips::V0, 65537); - __ LoadConst32(mips::V0, 2147483647); - __ LoadConst32(mips::V0, -32769); - __ LoadConst32(mips::V0, -65536); - __ LoadConst32(mips::V0, -65537); - __ LoadConst32(mips::V0, -2147483647); - __ LoadConst32(mips::V0, -2147483648); +TEST_F(AssemblerMIPSTest, BareBeq) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true); +} - const char* expected = - // IsUint<16>(value) - "ori $v0, $zero, 0\n" // __ LoadConst32(mips::V0, 0); - "ori $v0, $zero, 65535\n" // __ LoadConst32(mips::V0, 65535); - // IsInt<16>(value) - "addiu $v0, $zero, -1\n" // __ LoadConst32(mips::V0, -1); - "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips::V0, -32768); - // Everything else - "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65536); - "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65537); - "ori $v0, 1\n" // " - "lui $v0, 32767\n" // __ LoadConst32(mips::V0, 2147483647); - "ori $v0, 65535\n" // " - "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -32769); - "ori $v0, 32767\n" // " - "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -65536); - "lui $v0, 65534\n" // __ LoadConst32(mips::V0, -65537); - "ori $v0, 65535\n" // " - "lui $v0, 32768\n" // __ LoadConst32(mips::V0, -2147483647); - "ori $v0, 1\n" // " - "lui $v0, 32768\n"; // __ LoadConst32(mips::V0, -2147483648); - DriverStr(expected, "LoadConst32"); +TEST_F(AssemblerMIPSTest, BareBne) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true); } -TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) { - mips::MipsLabel label; - __ BindPcRelBaseLabel(); - __ LoadLabelAddress(mips::V0, mips::V1, &label); - constexpr size_t kAddiuCount = 0x1FDE; - for (size_t i = 0; i != kAddiuCount; ++i) { - __ Addiu(mips::A0, mips::A1, 0); - } - __ Bind(&label); +TEST_F(AssemblerMIPSTest, BareBeqz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true); +} - std::string expected = - "1:\n" - "addiu $v0, $v1, %lo(2f - 1b)\n" + - RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + - "2:\n"; - DriverStr(expected, "LoadFarthestNearLabelAddress"); +TEST_F(AssemblerMIPSTest, BareBnez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true); } -TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) { - mips::MipsLabel label; - __ BindPcRelBaseLabel(); - __ LoadLabelAddress(mips::V0, mips::V1, &label); - constexpr size_t kAdduCount = 0x1FDF; - for (size_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } - __ Bind(&label); +TEST_F(AssemblerMIPSTest, BareBltz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true); +} - std::string expected = - "1:\n" - "lui $at, %hi(2f - 1b)\n" - "ori $at, $at, %lo(2f - 1b)\n" - "addu $v0, $at, $v1\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n"; - DriverStr(expected, "LoadNearestFarLabelAddress"); +TEST_F(AssemblerMIPSTest, BareBgez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true); } -TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { - mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ BindPcRelBaseLabel(); - __ LoadLiteral(mips::V0, mips::V1, literal); - constexpr size_t kAddiuCount = 0x1FDE; - for (size_t i = 0; i != kAddiuCount; ++i) { - __ Addiu(mips::A0, mips::A1, 0); - } +TEST_F(AssemblerMIPSTest, BareBlez) { + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true); +} - std::string expected = - "1:\n" - "lw $v0, %lo(2f - 1b)($v1)\n" + - RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + - "2:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadFarthestNearLiteral"); +TEST_F(AssemblerMIPSTest, BareBgtz) { + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true); } -TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { - mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ BindPcRelBaseLabel(); - __ LoadLiteral(mips::V0, mips::V1, literal); - constexpr size_t kAdduCount = 0x1FDF; - for (size_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); - } +TEST_F(AssemblerMIPSTest, BareBlt) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true); +} - std::string expected = - "1:\n" - "lui $at, %hi(2f - 1b)\n" - "addu $at, $at, $v1\n" - "lw $v0, %lo(2f - 1b)($at)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadNearestFarLiteral"); +TEST_F(AssemblerMIPSTest, BareBge) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true); +} + +TEST_F(AssemblerMIPSTest, BareBltu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true); +} + +TEST_F(AssemblerMIPSTest, BareBgeu) { + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true); +} + +TEST_F(AssemblerMIPSTest, BareBc1f) { + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f", /* is_bare */ true); +} + +TEST_F(AssemblerMIPSTest, BareBc1t) { + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t", /* is_bare */ true); } TEST_F(AssemblerMIPSTest, ImpossibleReordering) { @@ -2554,7 +2360,7 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) { "nop\n" "addu $t0, $t1, $t2\n" - "beq $zero, $t0, 1b\n" + "beqz $t0, 1b\n" "nop\n" "or $t1, $t2, $t3\n" @@ -2563,17 +2369,17 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) { "and $t0, $t1, $t2\n" "slt $at, $t1, $t0\n" - "bne $zero, $at, 1b\n" + "bnez $at, 1b\n" "nop\n" "xor $at, $t0, $t1\n" "slt $at, $t1, $t0\n" - "beq $zero, $at, 1b\n" + "beqz $at, 1b\n" "nop\n" "subu $t0, $t1, $at\n" "sltu $at, $t1, $t0\n" - "bne $zero, $at, 1b\n" + "bnez $at, 1b\n" "nop\n" "c.olt.s $fcc1, $f2, $f4\n" @@ -2606,11 +2412,11 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) { "2:\n" - "bne $zero, $t0, 2b\n" + "bnez $t0, 2b\n" "nop\n" "sltu $at, $t1, $t0\n" - "beq $zero, $at, 2b\n" + "beqz $at, 2b\n" "nop\n" "bc1f $fcc2, 2b\n" @@ -2666,22 +2472,22 @@ TEST_F(AssemblerMIPSTest, Reordering) { ".set noreorder\n" "1:\n" - "beq $zero, $t1, 1b\n" + "beqz $t1, 1b\n" "addu $t0, $t1, $t2\n" "bne $t2, $t3, 1b\n" "or $t1, $t2, $t3\n" "slt $at, $t1, $t2\n" - "bne $zero, $at, 1b\n" + "bnez $at, 1b\n" "and $t0, $t1, $t2\n" "slt $at, $t1, $t0\n" - "beq $zero, $at, 1b\n" + "beqz $at, 1b\n" "xor $t2, $t0, $t1\n" "sltu $at, $t1, $t0\n" - "bne $zero, $at, 1b\n" + "bnez $at, 1b\n" "subu $t2, $t1, $t0\n" "bc1t $fcc1, 1b\n" @@ -2700,6 +2506,7 @@ TEST_F(AssemblerMIPSTest, Reordering) { TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) { mips::MipsLabel label1, label2, label3, label4, label5, label6; + mips::MipsLabel label7, label8, label9, label10, label11, label12, label13; __ SetReorder(true); __ B(&label1); @@ -2723,6 +2530,41 @@ TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) { __ Bind(&label6); __ CodePosition(); // Even across Bind(), CodePosition() prevents absorbing the ADDU above. + __ Nop(); + __ B(&label7); + __ Bind(&label7); + __ Lw(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed. + + __ Nop(); + __ B(&label8); + __ Bind(&label8); + __ Sw(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed. + + __ Nop(); + __ B(&label9); + __ Bind(&label9); + __ Addiu(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed. + + __ Nop(); + __ B(&label10); + __ Bind(&label10); + __ Lw(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed. + + __ Nop(); + __ B(&label11); + __ Bind(&label11); + __ Sw(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed. + + __ Nop(); + __ B(&label12); + __ Bind(&label12); + __ Addiu(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed. + + __ Nop(); + __ B(&label13); + __ Bind(&label13); + __ Andi(mips::V0, mips::A0, 0x5678); // Not one of patchable instructions, absorbed. + std::string expected = ".set noreorder\n" "b 1f\n" @@ -2744,7 +2586,49 @@ TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) { "b 5f\n" "nop\n" "5:\n" - "addu $t0, $t1, $t2\n"; + "addu $t0, $t1, $t2\n" + + "nop\n" + "b 7f\n" + "nop\n" + "7:\n" + "lw $v0, 0x5678($a0)\n" + + "nop\n" + "b 8f\n" + "nop\n" + "8:\n" + "sw $v0, 0x5678($a0)\n" + + "nop\n" + "b 9f\n" + "nop\n" + "9:\n" + "addiu $v0, $a0, 0x5678\n" + + "nop\n" + "b 10f\n" + "lw $v0, 0x5680($a0)\n" + "lw $v0, 0x5680($a0)\n" + "10:\n" + + "nop\n" + "b 11f\n" + "sw $v0, 0x5680($a0)\n" + "sw $v0, 0x5680($a0)\n" + "11:\n" + + "nop\n" + "b 12f\n" + "addiu $v0, $a0, 0x5680\n" + "addiu $v0, $a0, 0x5680\n" + "12:\n" + + "nop\n" + "b 13f\n" + "andi $v0, $a0, 0x5678\n" + "andi $v0, $a0, 0x5678\n" + "13:\n"; DriverStr(expected, "AbsorbTargetInstruction"); } @@ -2831,10 +2715,62 @@ TEST_F(AssemblerMIPSTest, SetReorder) { DriverStr(expected, "SetReorder"); } +TEST_F(AssemblerMIPSTest, ReorderPatchedInstruction) { + __ SetReorder(true); + mips::MipsLabel label1, label2; + mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5; + __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1); + __ Beq(mips::A0, mips::A1, &label1); + constexpr uint32_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label1); + __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2); + __ Bltz(mips::V1, &label2); + constexpr uint32_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label2); + __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3); + __ B(&label1); + __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4); + __ Jalr(mips::T9); + __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5); + __ Blt(mips::V0, mips::V1, &label2); + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + + std::string expected = + ".set noreorder\n" + "beq $a0, $a1, 1f\n" + "lw $v0, 0x5678($a0)\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bltz $v1, 2f\n" + "sw $v0, 0x5678($a0)\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "b 1b\n" + "addiu $v0, $a0, 0x5678\n" + "jalr $t9\n" + "lw $v0, 0x5678($a0)\n" + "slt $at, $v0, $v1\n" + "bnez $at, 2b\n" + "sw $v0, 0x5678($a0)\n" + "addu $zero, $zero, $zero\n"; + DriverStr(expected, "ReorderPatchedInstruction"); + EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 10) * 4u); +} + TEST_F(AssemblerMIPSTest, LongBranchReorder) { - mips::MipsLabel label; + mips::MipsLabel label, patcher_label1, patcher_label2; __ SetReorder(true); - __ Subu(mips::T0, mips::T1, mips::T2); + __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1); __ B(&label); constexpr uint32_t kAdduCount1 = (1u << 15) + 1; for (size_t i = 0; i != kAdduCount1; ++i) { @@ -2845,7 +2781,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); } - __ Subu(mips::T0, mips::T1, mips::T2); + __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2); __ B(&label); // Account for 5 extra instructions: ori, addu, lw, jalr, addiu. @@ -2856,7 +2792,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { std::ostringstream oss; oss << ".set noreorder\n" - "subu $t0, $t1, $t2\n" + "addiu $t0, $t1, 0x5678\n" "addiu $sp, $sp, -4\n" "sw $ra, 0($sp)\n" "bltzal $zero, .+4\n" @@ -2868,7 +2804,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { "addiu $sp, $sp, 4\n" << RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << - "subu $t0, $t1, $t2\n" + "addiu $t0, $t1, 0x5678\n" "addiu $sp, $sp, -4\n" "sw $ra, 0($sp)\n" "bltzal $zero, .+4\n" @@ -2880,6 +2816,129 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { "addiu $sp, $sp, 4\n"; std::string expected = oss.str(); DriverStr(expected, "LongBranchReorder"); + EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u); + EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 10) * 4u); +} + +/////////////////////// +// Loading Constants // +/////////////////////// + +TEST_F(AssemblerMIPSTest, LoadConst32) { + // IsUint<16>(value) + __ LoadConst32(mips::V0, 0); + __ LoadConst32(mips::V0, 65535); + // IsInt<16>(value) + __ LoadConst32(mips::V0, -1); + __ LoadConst32(mips::V0, -32768); + // Everything else + __ LoadConst32(mips::V0, 65536); + __ LoadConst32(mips::V0, 65537); + __ LoadConst32(mips::V0, 2147483647); + __ LoadConst32(mips::V0, -32769); + __ LoadConst32(mips::V0, -65536); + __ LoadConst32(mips::V0, -65537); + __ LoadConst32(mips::V0, -2147483647); + __ LoadConst32(mips::V0, -2147483648); + + const char* expected = + // IsUint<16>(value) + "ori $v0, $zero, 0\n" // __ LoadConst32(mips::V0, 0); + "ori $v0, $zero, 65535\n" // __ LoadConst32(mips::V0, 65535); + // IsInt<16>(value) + "addiu $v0, $zero, -1\n" // __ LoadConst32(mips::V0, -1); + "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips::V0, -32768); + // Everything else + "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65536); + "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65537); + "ori $v0, 1\n" // " + "lui $v0, 32767\n" // __ LoadConst32(mips::V0, 2147483647); + "ori $v0, 65535\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -32769); + "ori $v0, 32767\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -65536); + "lui $v0, 65534\n" // __ LoadConst32(mips::V0, -65537); + "ori $v0, 65535\n" // " + "lui $v0, 32768\n" // __ LoadConst32(mips::V0, -2147483647); + "ori $v0, 1\n" // " + "lui $v0, 32768\n"; // __ LoadConst32(mips::V0, -2147483648); + DriverStr(expected, "LoadConst32"); +} + +TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "addiu $v0, $v1, %lo(2f - 1b)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "ori $at, $at, %lo(2f - 1b)\n" + "addu $v0, $at, $v1\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); +} + +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + "1:\n" + "lw $v0, %lo(2f - 1b)($v1)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "addu $at, $at, $v1\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); } #undef __ diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 7a1beb656b..183b5e507b 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -795,14 +795,42 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { EmitFI(0x11, 0xD, ft, imm16); } +void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { + EmitI(0x4, rs, rt, imm16); +} + +void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { + EmitI(0x5, rs, rt, imm16); +} + void Mips64Assembler::Beqz(GpuRegister rt, uint16_t imm16) { - EmitI(0x4, ZERO, rt, imm16); + Beq(rt, ZERO, imm16); +} + +void Mips64Assembler::Bnez(GpuRegister rt, uint16_t imm16) { + Bne(rt, ZERO, imm16); +} + +void Mips64Assembler::Bltz(GpuRegister rt, uint16_t imm16) { + EmitI(0x1, rt, static_cast<GpuRegister>(0), imm16); } -void Mips64Assembler::EmitBcondc(BranchCondition cond, - GpuRegister rs, - GpuRegister rt, - uint32_t imm16_21) { +void Mips64Assembler::Bgez(GpuRegister rt, uint16_t imm16) { + EmitI(0x1, rt, static_cast<GpuRegister>(0x1), imm16); +} + +void Mips64Assembler::Blez(GpuRegister rt, uint16_t imm16) { + EmitI(0x6, rt, static_cast<GpuRegister>(0), imm16); +} + +void Mips64Assembler::Bgtz(GpuRegister rt, uint16_t imm16) { + EmitI(0x7, rt, static_cast<GpuRegister>(0), imm16); +} + +void Mips64Assembler::EmitBcondR6(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { switch (cond) { case kCondLT: Bltc(rs, rt, imm16_21); @@ -866,6 +894,55 @@ void Mips64Assembler::EmitBcondc(BranchCondition cond, } } +void Mips64Assembler::EmitBcondR2(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint16_t imm16) { + switch (cond) { + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltz(rs, imm16); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgez(rs, imm16); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blez(rs, imm16); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtz(rs, imm16); + break; + case kCondEQ: + Beq(rs, rt, imm16); + break; + case kCondNE: + Bne(rs, rt, imm16); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqz(rs, imm16); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnez(rs, imm16); + break; + case kCondF: + case kCondT: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -1002,6 +1079,22 @@ void Mips64Assembler::SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x10); } +void Mips64Assembler::SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x14); +} + +void Mips64Assembler::SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, ft, fs, fd, 0x14); +} + +void Mips64Assembler::SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x17); +} + +void Mips64Assembler::SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, ft, fs, fd, 0x17); +} + void Mips64Assembler::RintS(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x1a); } @@ -2013,37 +2106,67 @@ void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBit type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void Mips64Assembler::Branch::InitializeType(Type initial_type) { - OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); - switch (initial_type) { - case kLabel: - case kLiteral: - case kLiteralUnsigned: - case kLiteralLong: - CHECK(!IsResolved()); - type_ = initial_type; - break; - case kCall: - InitShortOrLong(offset_size, kCall, kLongCall); - break; - case kCondBranch: - switch (condition_) { - case kUncond: - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); - break; - case kCondEQZ: - case kCondNEZ: - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; - break; - default: - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); - break; - } - break; - default: - LOG(FATAL) << "Unexpected branch type " << initial_type; - UNREACHABLE(); +void Mips64Assembler::Branch::InitializeType(Type initial_type, bool is_r6) { + OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_); + if (is_r6) { + // R6 + switch (initial_type) { + case kLabel: + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + CHECK(!IsResolved()); + type_ = initial_type; + break; + case kCall: + InitShortOrLong(offset_size_needed, kCall, kLongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size_needed <= kOffset23) ? kCondBranch : kLongCondBranch; + break; + default: + InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch); + break; + } + break; + case kBareCall: + type_ = kBareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCondBranch: + type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); + } + } else { + // R2 + CHECK_EQ(initial_type, kBareCondBranch); + switch (condition_) { + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQ: + case kCondNE: + case kCondEQZ: + case kCondNEZ: + break; + default: + LOG(FATAL) << "Unexpected R2 branch condition " << condition_; + UNREACHABLE(); + } + type_ = kR2BareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); } old_type_ = type_; } @@ -2076,21 +2199,25 @@ bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, } } -Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call) +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare) : old_location_(location), location_(location), target_(target), lhs_reg_(ZERO), rhs_reg_(ZERO), condition_(kUncond) { - InitializeType(is_call ? kCall : kCondBranch); + InitializeType( + (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)), + /* is_r6 */ true); } -Mips64Assembler::Branch::Branch(uint32_t location, +Mips64Assembler::Branch::Branch(bool is_r6, + uint32_t location, uint32_t target, Mips64Assembler::BranchCondition condition, GpuRegister lhs_reg, - GpuRegister rhs_reg) + GpuRegister rhs_reg, + bool is_bare) : old_location_(location), location_(location), target_(target), @@ -2131,7 +2258,7 @@ Mips64Assembler::Branch::Branch(uint32_t location, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(kCondBranch); + InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6); } Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type) @@ -2142,7 +2269,7 @@ Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type la rhs_reg_(ZERO), condition_(kUncond) { CHECK_NE(dest_reg, ZERO); - InitializeType(label_or_literal_type); + InitializeType(label_or_literal_type, /* is_r6 */ true); } Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( @@ -2238,12 +2365,32 @@ uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { return GetOldLocation() + GetOldSize(); } +bool Mips64Assembler::Branch::IsBare() const { + switch (type_) { + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + // R2 short branches (can't be promoted to long), delay slots filled manually. + case kR2BareCondBranch: + return true; + default: + return false; + } +} + bool Mips64Assembler::Branch::IsLong() const { switch (type_) { - // Short branches. + // R6 short branches (can be promoted to long). case kUncondBranch: case kCondBranch: case kCall: + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + // R2 short branches (can't be promoted to long), delay slots filled manually. + case kR2BareCondBranch: // Near label. case kLabel: // Near literals. @@ -2271,8 +2418,9 @@ bool Mips64Assembler::Branch::IsResolved() const { } Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + bool r6_cond_branch = (type_ == kCondBranch || type_ == kBareCondBranch); OffsetBits offset_size = - (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) ? kOffset23 : branch_info_[type_].offset_size; return offset_size; @@ -2318,8 +2466,9 @@ void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) } void Mips64Assembler::Branch::PromoteToLong() { + CHECK(!IsBare()); // Bare branches do not promote. switch (type_) { - // Short branches. + // R6 short branches (can be promoted to long). case kUncondBranch: type_ = kLongUncondBranch; break; @@ -2366,7 +2515,7 @@ uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { } // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. - if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) { int64_t distance = static_cast<int64_t>(target_) - location_; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { @@ -2498,13 +2647,15 @@ void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { } } -void Mips64Assembler::Buncond(Mips64Label* label) { +void Mips64Assembler::Buncond(Mips64Label* label, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, /* is_call */ false); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ false, is_bare); FinalizeLabeledBranch(label); } void Mips64Assembler::Bcond(Mips64Label* label, + bool is_r6, + bool is_bare, BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { @@ -2513,13 +2664,13 @@ void Mips64Assembler::Bcond(Mips64Label* label, return; } uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare); FinalizeLabeledBranch(label); } -void Mips64Assembler::Call(Mips64Label* label) { +void Mips64Assembler::Call(Mips64Label* label, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, /* is_call */ true); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ true, is_bare); FinalizeLabeledBranch(label); } @@ -2730,11 +2881,18 @@ void Mips64Assembler::PromoteBranches() { // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { - // Short branches. + // R6 short branches (can be promoted to long). { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch // Exception: kOffset23 for beqzc/bnezc { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kCall + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kBareUncondBranch + { 1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kBareCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kBareCall + // R2 short branches (can't be promoted to long), delay slots filled manually. + { 1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kR2BareCondBranch // Near label. { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLabel // Near literals. @@ -2769,13 +2927,29 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { break; case Branch::kCondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - EmitBcondc(condition, lhs, rhs, offset); + EmitBcondR6(condition, lhs, rhs, offset); Nop(); // TODO: improve by filling the forbidden/delay slot. break; case Branch::kCall: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Balc(offset); break; + case Branch::kBareUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kBareCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondR6(condition, lhs, rhs, offset); + break; + case Branch::kBareCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Balc(offset); + break; + case Branch::kR2BareCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondR2(condition, lhs, rhs, offset); + break; // Near label. case Branch::kLabel: @@ -2804,7 +2978,7 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kLongCondBranch: - EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); @@ -2848,68 +3022,108 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); } -void Mips64Assembler::Bc(Mips64Label* label) { - Buncond(label); +void Mips64Assembler::Bc(Mips64Label* label, bool is_bare) { + Buncond(label, is_bare); +} + +void Mips64Assembler::Balc(Mips64Label* label, bool is_bare) { + Call(label, is_bare); +} + +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt); +} + +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt); +} + +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt); +} + +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt); +} + +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt); +} + +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt); +} + +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt); } -void Mips64Assembler::Balc(Mips64Label* label) { - Call(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondLT, rs, rt); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt); } -void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondLTZ, rt); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondGTZ, rt); +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rs); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondGE, rs, rt); +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rs); } -void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondGEZ, rt); +void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondLEZ, rt); +void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare) { + Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondLTU, rs, rt); +void Mips64Assembler::Bltz(GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondLTZ, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondGEU, rs, rt); +void Mips64Assembler::Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondGTZ, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondEQ, rs, rt); +void Mips64Assembler::Bgez(GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondGEZ, rt); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { - Bcond(label, kCondNE, rs, rt); +void Mips64Assembler::Blez(GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondLEZ, rt); } -void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { - Bcond(label, kCondEQZ, rs); +void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondEQ, rs, rt); } -void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { - Bcond(label, kCondNEZ, rs); +void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondNE, rs, rt); } -void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) { - Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO); +void Mips64Assembler::Beqz(GpuRegister rs, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondEQZ, rs); } -void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { - Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); +void Mips64Assembler::Bnez(GpuRegister rs, Mips64Label* label, bool is_bare) { + CHECK(is_bare); + Bcond(label, /* is_r6 */ false, is_bare, kCondNEZ, rs); } void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base, diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index c39d120bce..dd6dcd1896 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -86,7 +86,7 @@ static inline int InstrCountForLoadReplicatedConst32(int64_t value) { int32_t y = High32Bits(value); if (x == y) { - return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3; + return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3; } return INT_MAX; @@ -563,7 +563,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Bnezc(GpuRegister rs, uint32_t imm21); void Bc1eqz(FpuRegister ft, uint16_t imm16); void Bc1nez(FpuRegister ft, uint16_t imm16); - void Beqz(GpuRegister rt, uint16_t imm16); + void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2 + void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2 + void Beqz(GpuRegister rt, uint16_t imm16); // R2 + void Bnez(GpuRegister rt, uint16_t imm16); // R2 + void Bltz(GpuRegister rt, uint16_t imm16); // R2 + void Bgez(GpuRegister rt, uint16_t imm16); // R2 + void Blez(GpuRegister rt, uint16_t imm16); // R2 + void Bgtz(GpuRegister rt, uint16_t imm16); // R2 void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -599,6 +606,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void FloorWD(FpuRegister fd, FpuRegister fs); void SelS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void RintS(FpuRegister fd, FpuRegister fs); void RintD(FpuRegister fd, FpuRegister fs); void ClassS(FpuRegister fd, FpuRegister fs); @@ -922,22 +933,57 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // the table data) and should be loaded using LoadLabelAddress(). JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels); - void Bc(Mips64Label* label); - void Balc(Mips64Label* label); - void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Bltzc(GpuRegister rt, Mips64Label* label); - void Bgtzc(GpuRegister rt, Mips64Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Bgezc(GpuRegister rt, Mips64Label* label); - void Blezc(GpuRegister rt, Mips64Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); - void Beqzc(GpuRegister rs, Mips64Label* label); - void Bnezc(GpuRegister rs, Mips64Label* label); - void Bc1eqz(FpuRegister ft, Mips64Label* label); - void Bc1nez(FpuRegister ft, Mips64Label* label); + // When `is_bare` is false, the branches will promote to long (if the range + // of the individual branch instruction is insufficient) and the delay/ + // forbidden slots will be taken care of. + // Use `is_bare = false` when the branch target may be out of reach of the + // individual branch instruction. IOW, this is for general purpose use. + // + // When `is_bare` is true, just the branch instructions will be generated + // leaving delay/forbidden slot filling up to the caller and the branches + // won't promote to long if the range is insufficient (you'll get a + // compilation error when the range is exceeded). + // Use `is_bare = true` when the branch target is known to be within reach + // of the individual branch instruction. This is intended for small local + // optimizations around delay/forbidden slots. + // Also prefer using `is_bare = true` if the code near the branch is to be + // patched or analyzed at run time (e.g. introspection) to + // - show the intent and + // - fail during compilation rather than during patching/execution if the + // bare branch range is insufficent but the code size and layout are + // expected to remain unchanged + // + // R6 compact branches without delay/forbidden slots. + void Bc(Mips64Label* label, bool is_bare = false); + void Balc(Mips64Label* label, bool is_bare = false); + // R6 compact branches with forbidden slots. + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Blezc(GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); + void Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare = false); + void Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare = false); + // R6 branches with delay slots. + void Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare = false); + void Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare = false); + // R2 branches with delay slots that are also available on R6. + // The `is_bare` parameter exists and is checked in these branches only to + // prevent programming mistakes. These branches never promote to long, not + // even if `is_bare` is false. + void Bltz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Bgez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Blez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 + void Beqz(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2 + void Bnez(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2 void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword); @@ -1379,10 +1425,16 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer class Branch { public: enum Type { - // Short branches. + // R6 short branches (can be promoted to long). kUncondBranch, kCondBranch, kCall, + // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. + kBareUncondBranch, + kBareCondBranch, + kBareCall, + // R2 short branches (can't be promoted to long), delay slots filled manually. + kR2BareCondBranch, // Near label. kLabel, // Near literals. @@ -1425,8 +1477,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte // instructions) from the instruction containing the offset. uint32_t pc_org; - // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is - // an exception: use kOffset23 for beqzc/bnezc). + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch + // and kBareCondBranch are an exception: use kOffset23 for beqzc/bnezc). OffsetBits offset_size; // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift // count. @@ -1435,13 +1487,15 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer static const BranchInfo branch_info_[/* Type */]; // Unconditional branch or call. - Branch(uint32_t location, uint32_t target, bool is_call); + Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare); // Conditional branch. - Branch(uint32_t location, + Branch(bool is_r6, + uint32_t location, uint32_t target, BranchCondition condition, GpuRegister lhs_reg, - GpuRegister rhs_reg); + GpuRegister rhs_reg, + bool is_bare); // Label address (in literal area) or literal. Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type); @@ -1467,6 +1521,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer uint32_t GetOldSize() const; uint32_t GetEndLocation() const; uint32_t GetOldEndLocation() const; + bool IsBare() const; bool IsLong() const; bool IsResolved() const; @@ -1527,7 +1582,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer private: // Completes branch construction by determining and recording its type. - void InitializeType(Type initial_type); + void InitializeType(Type initial_type, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -1554,7 +1609,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); - void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitBcondR6(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitBcondR2(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint16_t imm16); void EmitMsa3R(int operation, int df, VectorRegister wt, @@ -1568,12 +1624,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); - void Buncond(Mips64Label* label); + void Buncond(Mips64Label* label, bool is_bare); void Bcond(Mips64Label* label, + bool is_r6, + bool is_bare, BranchCondition condition, GpuRegister lhs, GpuRegister rhs = ZERO); - void Call(Mips64Label* label); + void Call(Mips64Label* label, bool is_bare); void FinalizeLabeledBranch(Mips64Label* label); Branch* GetBranch(uint32_t branch_id); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 021e335697..fc0bd368ea 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -257,11 +257,46 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return result; } + void BranchHelper(void (mips64::Mips64Assembler::*f)(mips64::Mips64Label*, + bool), + const std::string& instr_name, + bool is_bare = false) { + mips64::Mips64Label label1, label2; + (Base::GetAssembler()->*f)(&label1, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + (Base::GetAssembler()->*f)(&label2, is_bare); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + (Base::GetAssembler()->*f)(&label1, is_bare); + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + instr_name + " 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + + instr_name + " 1b\n" + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, - mips64::Mips64Label*), - const std::string& instr_name) { + mips64::Mips64Label*, + bool), + const std::string& instr_name, + bool is_bare = false) { mips64::Mips64Label label; - (Base::GetAssembler()->*f)(mips64::A0, &label); + (Base::GetAssembler()->*f)(mips64::A0, &label, is_bare); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); @@ -271,26 +306,30 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } - (Base::GetAssembler()->*f)(mips64::A1, &label); + (Base::GetAssembler()->*f)(mips64::A1, &label, is_bare); + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); std::string expected = ".set noreorder\n" + - instr_name + " $a0, 1f\n" - "nop\n" + + instr_name + " $a0, 1f\n" + + (is_bare ? "" : "nop\n") + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - instr_name + " $a1, 1b\n" - "nop\n"; + instr_name + " $a1, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; DriverStr(expected, instr_name); } void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, mips64::GpuRegister, - mips64::Mips64Label*), - const std::string& instr_name) { + mips64::Mips64Label*, + bool), + const std::string& instr_name, + bool is_bare = false) { mips64::Mips64Label label; - (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label, is_bare); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); @@ -300,17 +339,51 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } - (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label, is_bare); + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); std::string expected = ".set noreorder\n" + - instr_name + " $a0, $a1, 1f\n" - "nop\n" + + instr_name + " $a0, $a1, 1f\n" + + (is_bare ? "" : "nop\n") + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - instr_name + " $a2, $a3, 1b\n" - "nop\n"; + instr_name + " $a2, $a3, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; + DriverStr(expected, instr_name); + } + + void BranchFpuCondHelper(void (mips64::Mips64Assembler::*f)(mips64::FpuRegister, + mips64::Mips64Label*, + bool), + const std::string& instr_name, + bool is_bare = false) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::F0, &label, is_bare); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::F31, &label, is_bare); + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + + std::string expected = + ".set noreorder\n" + + instr_name + " $f0, 1f\n" + + (is_bare ? "" : "nop\n") + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $f31, 1b\n" + + (is_bare ? "" : "nop\n") + + "addu $zero, $zero, $zero\n"; DriverStr(expected, instr_name); } @@ -452,6 +525,26 @@ TEST_F(AssemblerMIPS64Test, SelD) { DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d"); } +TEST_F(AssemblerMIPS64Test, SeleqzS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SeleqzS, "seleqz.s ${reg1}, ${reg2}, ${reg3}"), + "seleqz.s"); +} + +TEST_F(AssemblerMIPS64Test, SeleqzD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SeleqzD, "seleqz.d ${reg1}, ${reg2}, ${reg3}"), + "seleqz.d"); +} + +TEST_F(AssemblerMIPS64Test, SelnezS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelnezS, "selnez.s ${reg1}, ${reg2}, ${reg3}"), + "selnez.s"); +} + +TEST_F(AssemblerMIPS64Test, SelnezD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelnezD, "selnez.d ${reg1}, ${reg2}, ${reg3}"), + "selnez.d"); +} + TEST_F(AssemblerMIPS64Test, RintS) { DriverStr(RepeatFF(&mips64::Mips64Assembler::RintS, "rint.s ${reg1}, ${reg2}"), "rint.s"); } @@ -668,120 +761,21 @@ TEST_F(AssemblerMIPS64Test, Sdc1) { "sdc1"); } -//////////////// -// CALL / JMP // -//////////////// +////////////// +// BRANCHES // +////////////// TEST_F(AssemblerMIPS64Test, Jalr) { DriverStr(".set noreorder\n" + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); } -TEST_F(AssemblerMIPS64Test, Balc) { - mips64::Mips64Label label1, label2; - __ Balc(&label1); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label1); - __ Balc(&label2); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label2); - __ Balc(&label1); - - std::string expected = - ".set noreorder\n" - "balc 1f\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" - "balc 2f\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "2:\n" - "balc 1b\n"; - DriverStr(expected, "Balc"); -} - -TEST_F(AssemblerMIPS64Test, LongBalc) { - constexpr uint32_t kNopCount1 = (1u << 25) + 1; - constexpr uint32_t kNopCount2 = (1u << 25) + 1; - constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; - ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); - __ GetBuffer()->ExtendCapacity(kRequiredCapacity); - mips64::Mips64Label label1, label2; - __ Balc(&label1); - for (uint32_t i = 0; i != kNopCount1; ++i) { - __ Nop(); - } - __ Bind(&label1); - __ Balc(&label2); - for (uint32_t i = 0; i != kNopCount2; ++i) { - __ Nop(); - } - __ Bind(&label2); - __ Balc(&label1); - - uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc. - offset_forward1 <<= 2; - offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc. - - uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc. - offset_forward2 <<= 2; - offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc. - - uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc. - offset_back <<= 2; - offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc. - - // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs - // instead of generating them ourselves in the source code. This saves a few minutes - // of test time. - std::ostringstream oss; - oss << - ".set noreorder\n" - "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n" - "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" - ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" - "1:\n" - "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n" - "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" - ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" - "2:\n" - "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" - "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"; - std::string expected = oss.str(); - DriverStr(expected, "LongBalc"); -} - TEST_F(AssemblerMIPS64Test, Bc) { - mips64::Mips64Label label1, label2; - __ Bc(&label1); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label1); - __ Bc(&label2); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label2); - __ Bc(&label1); + BranchHelper(&mips64::Mips64Assembler::Bc, "Bc"); +} - std::string expected = - ".set noreorder\n" - "bc 1f\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" - "bc 2f\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "2:\n" - "bc 1b\n"; - DriverStr(expected, "Bc"); +TEST_F(AssemblerMIPS64Test, Balc) { + BranchHelper(&mips64::Mips64Assembler::Balc, "Balc"); } TEST_F(AssemblerMIPS64Test, Beqzc) { @@ -833,55 +827,107 @@ TEST_F(AssemblerMIPS64Test, Bgeuc) { } TEST_F(AssemblerMIPS64Test, Bc1eqz) { - mips64::Mips64Label label; - __ Bc1eqz(mips64::F0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bc1eqz(mips64::F31, &label); - - std::string expected = - ".set noreorder\n" - "bc1eqz $f0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bc1eqz $f31, 1b\n" - "nop\n"; - DriverStr(expected, "Bc1eqz"); + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz"); } TEST_F(AssemblerMIPS64Test, Bc1nez) { - mips64::Mips64Label label; - __ Bc1nez(mips64::F0, &label); - constexpr size_t kAdduCount1 = 63; - for (size_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label); - constexpr size_t kAdduCount2 = 64; - for (size_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bc1nez(mips64::F31, &label); + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez"); +} - std::string expected = - ".set noreorder\n" - "bc1nez $f0, 1f\n" - "nop\n" + - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + - "1:\n" + - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + - "bc1nez $f31, 1b\n" - "nop\n"; - DriverStr(expected, "Bc1nez"); +TEST_F(AssemblerMIPS64Test, BareBc) { + BranchHelper(&mips64::Mips64Assembler::Bc, "Bc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBalc) { + BranchHelper(&mips64::Mips64Assembler::Balc, "Balc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBeqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBlezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBeqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBc1eqz) { + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBc1nez) { + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBeqz) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqz, "Beqz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBnez) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnez, "Bnez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBltz) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltz, "Bltz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgez) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgez, "Bgez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBlez) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blez, "Blez", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBgtz) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtz, "Bgtz", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBeq) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beq, "Beq", /* is_bare */ true); +} + +TEST_F(AssemblerMIPS64Test, BareBne) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bne, "Bne", /* is_bare */ true); } TEST_F(AssemblerMIPS64Test, LongBeqc) { @@ -924,6 +970,102 @@ TEST_F(AssemblerMIPS64Test, LongBeqc) { DriverStr(expected, "LongBeqc"); } +TEST_F(AssemblerMIPS64Test, LongBeqzc) { + constexpr uint32_t kNopCount1 = (1u << 20) + 1; + constexpr uint32_t kNopCount2 = (1u << 20) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); + mips64::Mips64Label label; + __ Beqzc(mips64::A0, &label); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); + } + __ Bind(&label); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); + } + __ Beqzc(mips64::A2, &label); + + uint32_t offset_forward = 2 + kNopCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kNopCount2 + 1); // 1: account for bnezc. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves test time. + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnezc $a0, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" + "2:\n" << + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" + "bnezc $a2, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqzc"); +} + +TEST_F(AssemblerMIPS64Test, LongBalc) { + constexpr uint32_t kNopCount1 = (1u << 25) + 1; + constexpr uint32_t kNopCount2 = (1u << 25) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); + mips64::Mips64Label label1, label2; + __ Balc(&label1); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); + } + __ Bind(&label1); + __ Balc(&label2); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); + } + __ Bind(&label2); + __ Balc(&label1); + + uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc. + + uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc. + + uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc. + + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves a few minutes + // of test time. + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" + "1:\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" + "2:\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBalc"); +} + ////////// // MISC // ////////// @@ -961,235 +1103,6 @@ TEST_F(AssemblerMIPS64Test, Addiupc) { DriverStr(RepeatRIb(&mips64::Mips64Assembler::Addiupc, 19, code), "Addiupc"); } -TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) { - mips64::Mips64Label label; - __ LoadLabelAddress(mips64::V0, &label); - constexpr uint32_t kAdduCount = 0x3FFDE; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label); - - std::string expected = - "lapc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n"; - DriverStr(expected, "LoadFarthestNearLabelAddress"); - EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) { - mips64::Mips64Label label; - __ LoadLabelAddress(mips64::V0, &label); - constexpr uint32_t kAdduCount = 0x3FFDF; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - __ Bind(&label); - - std::string expected = - "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "daddiu $v0, $at, %lo(2f - 1b)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n"; - DriverStr(expected, "LoadNearestFarLabelAddress"); - EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) { - mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); - constexpr uint32_t kAdduCount = 0x3FFDE; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "lwpc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadFarthestNearLiteral"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) { - mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); - constexpr uint32_t kAdduCount = 0x3FFDF; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "lw $v0, %lo(2f - 1b)($at)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadNearestFarLiteral"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) { - mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); - constexpr uint32_t kAdduCount = 0x3FFDE; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "lwupc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadFarthestNearLiteralUnsigned"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) { - mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); - __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); - constexpr uint32_t kAdduCount = 0x3FFDF; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "lwu $v0, %lo(2f - 1b)($at)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n" - ".word 0x12345678\n"; - DriverStr(expected, "LoadNearestFarLiteralUnsigned"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) { - mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); - __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); - constexpr uint32_t kAdduCount = 0x3FFDD; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "ldpc $v0, 1f\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "1:\n" - ".dword 0x0123456789ABCDEF\n"; - DriverStr(expected, "LoadFarthestNearLiteralLong"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) { - mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); - __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); - constexpr uint32_t kAdduCount = 0x3FFDE; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - - std::string expected = - "1:\n" - "auipc $at, %hi(2f - 1b)\n" - "ld $v0, %lo(2f - 1b)($at)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "2:\n" - ".dword 0x0123456789ABCDEF\n"; - DriverStr(expected, "LoadNearestFarLiteralLong"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); -} - -TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) { - mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); - mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); - mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA)); - __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); - __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); - __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3); - __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); - __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); - // A nop will be inserted here before the 64-bit literals. - - std::string expected = - "ldpc $a1, 1f\n" - // The GNU assembler incorrectly requires the ldpc instruction to be located - // at an address that's a multiple of 8. TODO: Remove this workaround if/when - // the assembler is fixed. - // "ldpc $a2, 2f\n" - ".word 0xECD80004\n" - "ldpc $a3, 3f\n" - "lapc $v0, 1f\n" - "lapc $v1, 2f\n" - "nop\n" - "1:\n" - ".dword 0x0123456789ABCDEF\n" - "2:\n" - ".dword 0x5555555555555555\n" - "3:\n" - ".dword 0xAAAAAAAAAAAAAAAA\n"; - DriverStr(expected, "LongLiteralAlignmentNop"); - EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u); - EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u); - EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u); -} - -TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) { - mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); - mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); - __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); - __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); - __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); - __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); - - std::string expected = - "ldpc $a1, 1f\n" - // The GNU assembler incorrectly requires the ldpc instruction to be located - // at an address that's a multiple of 8. TODO: Remove this workaround if/when - // the assembler is fixed. - // "ldpc $a2, 2f\n" - ".word 0xECD80003\n" - "lapc $v0, 1f\n" - "lapc $v1, 2f\n" - "1:\n" - ".dword 0x0123456789ABCDEF\n" - "2:\n" - ".dword 0x5555555555555555\n"; - DriverStr(expected, "LongLiteralAlignmentNoNop"); - EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u); - EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u); -} - -TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) { - mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); - __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); - __ LoadLabelAddress(mips64::V1, literal->GetLabel()); - constexpr uint32_t kAdduCount = 0x3FFDF; - for (uint32_t i = 0; i != kAdduCount; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); - } - // A nop will be inserted here before the 64-bit literal. - - std::string expected = - "1:\n" - "auipc $at, %hi(3f - 1b)\n" - "ld $v0, %lo(3f - 1b)($at)\n" - "2:\n" - "auipc $at, %hi(3f - 2b)\n" - "daddiu $v1, $at, %lo(3f - 2b)\n" + - RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + - "nop\n" - "3:\n" - ".dword 0x0123456789ABCDEF\n"; - DriverStr(expected, "FarLongLiteralAlignmentNop"); - EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4); -} - TEST_F(AssemblerMIPS64Test, Addu) { DriverStr(RepeatRRR(&mips64::Mips64Assembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "addu"); } @@ -2740,6 +2653,235 @@ TEST_F(AssemblerMIPS64Test, LoadConst64) { EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths); } +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "daddiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwupc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lwu $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDD; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "ldpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadFarthestNearLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "ld $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadNearestFarLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + // A nop will be inserted here before the 64-bit literals. + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80004\n" + "ldpc $a3, 3f\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "nop\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n" + "3:\n" + ".dword 0xAAAAAAAAAAAAAAAA\n"; + DriverStr(expected, "LongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80003\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n"; + DriverStr(expected, "LongLiteralAlignmentNoNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u); +} + +TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + __ LoadLabelAddress(mips64::V1, literal->GetLabel()); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + // A nop will be inserted here before the 64-bit literal. + + std::string expected = + "1:\n" + "auipc $at, %hi(3f - 1b)\n" + "ld $v0, %lo(3f - 1b)($at)\n" + "2:\n" + "auipc $at, %hi(3f - 2b)\n" + "daddiu $v1, $at, %lo(3f - 2b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "nop\n" + "3:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "FarLongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4); +} + // MSA instructions. TEST_F(AssemblerMIPS64Test, AndV) { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index b50f1af8f9..b89af10749 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1606,6 +1606,42 @@ void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { } +void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x68); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x69); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6A); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6D); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 8578340ea7..511eeb9973 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -546,6 +546,11 @@ class X86Assembler FINAL : public Assembler { void punpckldq(XmmRegister dst, XmmRegister src); void punpcklqdq(XmmRegister dst, XmmRegister src); + void punpckhbw(XmmRegister dst, XmmRegister src); + void punpckhwd(XmmRegister dst, XmmRegister src); + void punpckhdq(XmmRegister dst, XmmRegister src); + void punpckhqdq(XmmRegister dst, XmmRegister src); + void psllw(XmmRegister reg, const Immediate& shift_count); void pslld(XmmRegister reg, const Immediate& shift_count); void psllq(XmmRegister reg, const Immediate& shift_count); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 3e1244ed5d..d2122db3fa 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -777,6 +777,22 @@ TEST_F(AssemblerX86Test, Punpcklqdq) { DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); } +TEST_F(AssemblerX86Test, Punpckhbw) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw"); +} + +TEST_F(AssemblerX86Test, Punpckhwd) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd"); +} + +TEST_F(AssemblerX86Test, Punpckhdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq"); +} + +TEST_F(AssemblerX86Test, Punpckhqdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq"); +} + TEST_F(AssemblerX86Test, psllw) { GetAssembler()->psllw(x86::XMM0, CreateImmediate(16)); DriverStr("psllw $0x10, %xmm0\n", "psllwi"); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ea69a1c9be..3bff67d2f2 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1835,6 +1835,46 @@ void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x68); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x69); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6A); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1931,6 +1971,18 @@ void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { } +void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); +} + + void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 41450bff4f..3dab235d1c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -574,6 +574,11 @@ class X86_64Assembler FINAL : public Assembler { void punpckldq(XmmRegister dst, XmmRegister src); void punpcklqdq(XmmRegister dst, XmmRegister src); + void punpckhbw(XmmRegister dst, XmmRegister src); + void punpckhwd(XmmRegister dst, XmmRegister src); + void punpckhdq(XmmRegister dst, XmmRegister src); + void punpckhqdq(XmmRegister dst, XmmRegister src); + void psllw(XmmRegister reg, const Immediate& shift_count); void pslld(XmmRegister reg, const Immediate& shift_count); void psllq(XmmRegister reg, const Immediate& shift_count); @@ -585,6 +590,7 @@ class X86_64Assembler FINAL : public Assembler { void psrlw(XmmRegister reg, const Immediate& shift_count); void psrld(XmmRegister reg, const Immediate& shift_count); void psrlq(XmmRegister reg, const Immediate& shift_count); + void psrldq(XmmRegister reg, const Immediate& shift_count); void flds(const Address& src); void fstps(const Address& dst); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index ec14e7a825..85afee0746 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1465,6 +1465,22 @@ TEST_F(AssemblerX86_64Test, Punpcklqdq) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); } +TEST_F(AssemblerX86_64Test, Punpckhbw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw"); +} + +TEST_F(AssemblerX86_64Test, Punpckhwd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd"); +} + +TEST_F(AssemblerX86_64Test, Punpckhdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq"); +} + +TEST_F(AssemblerX86_64Test, Punpckhqdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq"); +} + TEST_F(AssemblerX86_64Test, Psllw) { GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); @@ -1521,6 +1537,13 @@ TEST_F(AssemblerX86_64Test, Psrlq) { "psrlq $2, %xmm15\n", "pslrqi"); } +TEST_F(AssemblerX86_64Test, Psrldq) { + GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrldq $1, %xmm0\n" + "psrldq $2, %xmm15\n", "pslrdqi"); +} + TEST_F(AssemblerX86_64Test, UcomissAddress) { GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); @@ -2012,7 +2035,7 @@ std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI); size_t frame_size = 10 * kStackAlignment; - assembler->BuildFrame(10 * kStackAlignment, method_reg, spill_regs, entry_spills); + assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills); // Construct assembly text counterpart. std::ostringstream str; @@ -2048,7 +2071,7 @@ std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_tes ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs); size_t frame_size = 10 * kStackAlignment; - assembler->RemoveFrame(10 * kStackAlignment, spill_regs); + assembler->RemoveFrame(frame_size, spill_regs); // Construct assembly text counterpart. std::ostringstream str; diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 65389252e2..5c097da16f 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -624,7 +624,7 @@ TEST_F(VerifierDepsTest, ConstClass_Resolved) { } TEST_F(VerifierDepsTest, ConstClass_Unresolved) { - ASSERT_TRUE(VerifyMethod("ConstClass_Unresolved")); + ASSERT_FALSE(VerifyMethod("ConstClass_Unresolved")); ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); } @@ -634,7 +634,7 @@ TEST_F(VerifierDepsTest, CheckCast_Resolved) { } TEST_F(VerifierDepsTest, CheckCast_Unresolved) { - ASSERT_TRUE(VerifyMethod("CheckCast_Unresolved")); + ASSERT_FALSE(VerifyMethod("CheckCast_Unresolved")); ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); } @@ -644,7 +644,7 @@ TEST_F(VerifierDepsTest, InstanceOf_Resolved) { } TEST_F(VerifierDepsTest, InstanceOf_Unresolved) { - ASSERT_TRUE(VerifyMethod("InstanceOf_Unresolved")); + ASSERT_FALSE(VerifyMethod("InstanceOf_Unresolved")); ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); } @@ -654,12 +654,12 @@ TEST_F(VerifierDepsTest, NewInstance_Resolved) { } TEST_F(VerifierDepsTest, NewInstance_Unresolved) { - ASSERT_TRUE(VerifyMethod("NewInstance_Unresolved")); + ASSERT_FALSE(VerifyMethod("NewInstance_Unresolved")); ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); } TEST_F(VerifierDepsTest, NewArray_Unresolved) { - ASSERT_TRUE(VerifyMethod("NewArray_Unresolved")); + ASSERT_FALSE(VerifyMethod("NewArray_Unresolved")); ASSERT_TRUE(HasClass("[LUnresolvedClass;", false)); } |